Mercurial > repos > iuc > find_nested_alt_orfs
changeset 0:14f4c7a8a962 draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/naltorfs/ commit cbedf7b5968b45a08df88d4ad799951d6f50a2bd"
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/find_nested_alt_orfs.xml Mon Apr 11 20:37:19 2022 +0000 @@ -0,0 +1,115 @@ +<tool id="find_nested_alt_orfs" name="Find Nested Alternate ORFs (nAlt-ORFs)" version="@TOOL_VERSION@" profile="20.05"> + <description>from BED and 2bit/FASTA</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="edam_ontology"/> + <expand macro="requirements"> + <requirement type="package" version="377">ucsc-fatotwobit</requirement> + </expand> + <version_command>find_nested_alt_orfs.py --version</version_command> + <command detect_errors="exit_code"><![CDATA[ + #if $ref.ref_source == 'history': + #if $ref.ref_file.is_of_type('fasta'): + #set $twobit_filename = "out.2bit" + faToTwoBit '${ref.ref_file}' '$twobit_filename' -stripVersion && + #else: + #set $twobit_filename = $ref.ref_file + #end if + #else: + #set $twobit_filename = $ref.ref_loc.fields.path + #end if + find_nested_alt_orfs.py + --twobit '$twobit_filename' + + --min_length '$min_length' + + --reference '${input.metadata.dbkey}' + + --bed '$input' + + #if $no_canonical_cds: + --no_canonical_cds '$no_canonical_cds' + #end if + + --translation_table '$translation_table' + + --bed_out '$bed_out' + + --peptide_fasta_out '$peptide_fasta_out' + + --naltorfs_fasta_out '$naltorfs_fasta_out' + + --cds_fasta_out '$cds_fasta_out' + + $unique_sequences + + #if $write_log: + --log '$log' + #end if + ]]></command> + <inputs> + <param argument="--bed" name="input" type="data" format="bed" label="Gene BED" help="A BED file with 12 columns"/> + <conditional name="ref"> + <param name="ref_source" type="select" label="Source for Genomic Sequence Data"> + <option value="cached">Locally cached genome</option> + <option value="history">History dataset genome</option> + </param> + <when value="cached"> + <param argument="--twobit" name="ref_loc" type="select" label="Select reference genome"> + <options from_data_table="twobit"/> + </param> + </when> + <when value="history"> + <param argument="--twobit" name="ref_file" type="data" format="twobit,fasta" label="Reference genome"/> + </when> + </conditional> + <param argument="--min_length" type="integer" value="150" min="1" label="Minimum length of peptide translation to report"/> + <param argument="--unique_sequences" type="boolean" truevalue="--unique_sequences" falsevalue="" checked="True" label="Only report the first unique occurrence of an alternate sequence"/> + <param argument="--no_canonical_cds" type="data" format="bed" optional="True" label="Do not report any alternate sequences that match a provided canonical CDS" help="A BED file with 12 columns"/> + <expand macro="translation_table_select_parameter"/> + <param name="write_log" type="boolean" truevalue="true" falsevalue="false" checked="True" label="Create log file"/> + </inputs> + <outputs> + <data name="bed_out" format="bed" label="Find Nested nAlt-ORFs on ${on_string}: nAlt-ORFs.proBed"> + <actions> + <action name="column_names" type="metadata" default="chrom,chromStart,chromEnd,name,score,strand,thickStart,thickEnd,itemRgb,blockCount,blockSizes,blockStarts,proteinAccession,peptideSequence,uniqueness,genomeReferenceVersion,psmScore,fdr,modifications,charge,expMassToCharge,calcMassToCharge,psmRank,datasetID,uri"/> + </actions> + </data> + <data name="peptide_fasta_out" format="fasta" label="Find nAlt-ORFs on ${on_string}: peptide.fasta"/> + <data name="naltorfs_fasta_out" format="fasta" label="Find nAlt-ORFs on ${on_string}: nAlt-ORFs.fasta"/> + <data name="cds_fasta_out" format="fasta" label="Find nAlt-ORFs on ${on_string}: canonical CDS.fasta"/> + <data name="log" format="txt" label="Find nAlt-ORFs on ${on_string}: log.txt"> + <filter>write_log</filter> + </data> + </outputs> + <tests> + <test> + <param name="input" value="gencode_canonical.hg38.chr6_GL0002508.bed" dbkey="hg38" ftype="bed12"/> + <conditional name="ref"> + <param name="ref_source" value="history"/> + <param name="ref_file" value="hg38.chr6_GL000250v2_alt.2bit.gz" dbkey="hg38" ftype="twobit"/> + </conditional> + <param name="min_length" value="150"/> + <param name="unique_sequences" value="true"/> + <param name="no_canonical_cds" value="gencode_canonical.hg38.chr6_GL0002508.bed" dbkey="hg38" ftype="bed12"/> + <param name="translation_table" value="1"/> + <param name="write_log" value="true"/> + <output name="bed_out" value="bed_out.bed"/> + <output name="peptide_fasta_out" value="peptide_fasta_out.fasta"/> + <output name="naltorfs_fasta_out" value="naltorfs_fasta_out.fasta"/> + <output name="cds_fasta_out" value="cds_fasta_out.fasta"/> + <output name="log" value="find_naltorfs_log.txt"/> + </test> + </tests> + <help><![CDATA[ +Find Nested Alternate Open Reading Frames (nAlt-ORFs). + +Using a BED12 file containing the location of genes, and a matching reference genome, +this tool searches in the alternate reading frames of the provided canonical coding sequence, +and outputs nested alternate ORFs which match the provided thresholds. + +Translation table identifiers are based upon NCBI standards (https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi). + ]]></help> + <expand macro="citations"/> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Mon Apr 11 20:37:19 2022 +0000 @@ -0,0 +1,63 @@ +<macros> + <token name="@TOOL_VERSION@">0.1.2</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">naltorfs</requirement> + <yield/> + </requirements> + </xml> + <xml name="translation_table_select_parameter"> + <param argument="--translation_table" type="select" label="Translation table"> + <option value="1" selected="true">Table 1 Standard, SGC0</option> + <option value="2">Table 2 Vertebrate Mitochondrial, SGC1</option> + <option value="3">Table 3 Yeast Mitochondrial, SGC2</option> + <option value="4">Table 4 Mold Mitochondrial, Protozoan Mitochondrial, Coelenterate Mitochondrial, Mycoplasma, Spiroplasma, SGC3</option> + <option value="5">Table 5 Invertebrate Mitochondrial, SGC4</option> + <option value="6">Table 6 Ciliate Nuclear, Dasycladacean Nuclear, Hexamita Nuclear, SGC5</option> + <option value="9">Table 9 Echinoderm Mitochondrial, Flatworm Mitochondrial, SGC8</option> + <option value="10">Table 10 Euplotid Nuclear, SGC9</option> + <option value="11">Table 11 Bacterial, Archaeal, Plant Plastid</option> + <option value="12">Table 12 Alternative Yeast Nuclear</option> + <option value="13">Table 13 Ascidian Mitochondrial</option> + <option value="14">Table 14 Alternative Flatworm Mitochondrial</option> + <option value="15">Table 15 Blepharisma Macronuclear</option> + <option value="16">Table 16 Chlorophycean Mitochondrial</option> + <option value="21">Table 21 Trematode Mitochondrial</option> + <option value="22">Table 22 Scenedesmus obliquus Mitochondrial</option> + <option value="23">Table 23 Thraustochytrium Mitochondrial</option> + <option value="24">Table 24 Pterobranchia Mitochondrial</option> + <option value="25">Table 25 Candidate Division SR1, Gracilibacteria</option> + <option value="26">Table 26 Pachysolen tannophilus Nuclear</option> + <option value="27">Table 27 Karyorelict Nuclear</option> + <option value="28">Table 28 Condylostoma Nuclear</option> + <option value="29">Table 29 Mesodinium Nuclear</option> + <option value="30">Table 30 Peritrich Nuclear</option> + <option value="31">Table 31 Blastocrithidia Nuclear</option> + <option value="32">Table 32 Balanophoraceae Plastid</option> + <option value="33">Table 33 Cephalodiscidae Mitochondrial</option> + </param> + </xml> + <xml name="edam_ontology"> + <edam_topics> + <edam_topic>topic_0622</edam_topic> + <edam_topic>topic_0091</edam_topic> + </edam_topics> + <edam_operations> + <edam_operation>operation_0286</edam_operation> + <edam_operation>operation_0284</edam_operation> + <edam_operation>operation_0436</edam_operation> + <edam_operation>operation_0362</edam_operation> + </edam_operations> + </xml> + <xml name="citations"> + <citations> + <citation type="bibtex"> + @UNPUBLISHED{nAltORFs, + author = "Kommireddy Vasu and Debjit Khan and Iyappan Ramachandiran and Daniel Blankenberg and Paul L. Fox", + title = "Nested Alternate Open Reading Frames and their Encoded Proteins: The Hidden Orfeome", + year = "2022" + } + </citation> + </citations> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/aa_freq_from_bicodon.tabular Mon Apr 11 20:37:19 2022 +0000 @@ -0,0 +1,5 @@ +#frame aa_count * A C D E F G H I K L M N P Q R S T V W Y +2-counts 855 20 74 27 11 20 27 52 15 42 18 95 31 20 81 20 72 86 59 44 26 15 +2-percent 855 2.3391812865497074724885351316 8.6549707602339189804752095370 3.1578947368421053099041273526 1.2865497076023393319132992474 2.3391812865497074724885351316 3.1578947368421053099041273526 6.0818713450292394284701913421 1.7543859649122806043664013487 4.9122807017543861363151336263 2.1052631578947367252396816184 11.1111111111111107163651467999 3.6257309941520468044018343790 2.3391812865497074724885351316 9.4736842105263168178908017580 2.3391812865497074724885351316 8.4210526315789469009587264736 10.0584795321637425757899109158 6.9005847953216372658857835631 5.1461988304093573276531969896 3.0409356725146197142350956710 1.7543859649122806043664013487 +3-counts 855 42 62 55 22 41 15 71 28 12 6 69 7 6 104 42 70 91 42 27 28 15 +3-percent 855 4.9122807017543861363151336263 7.2514619883040936088036687579 6.4327485380116957713880765368 2.5730994152046786638265984948 4.7953216374269000965568920947 1.7543859649122806043664013487 8.3040935672514617493789046421 3.2748538011695909055731590342 1.4035087719298244834931210789 0.7017543859649122417465605395 8.0701754385964914462192609790 0.8187134502923977263932897586 0.7017543859649122417465605395 12.1637426900584788569403826841 4.9122807017543861363151336263 8.1871345029239765977990828105 10.6432748538011701100458594738 4.9122807017543861363151336263 3.1578947368421053099041273526 3.2748538011695909055731590342 1.7543859649122806043664013487
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/all_fasta.loc Mon Apr 11 20:37:19 2022 +0000 @@ -0,0 +1,18 @@ +#This file lists the locations and dbkeys of all the fasta files +#under the "genome" directory (a directory that contains a directory +#for each build). The script extract_fasta.py will generate the file +#all_fasta.loc. This file has the format (white space characters are +#TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_path> +# +#So, all_fasta.loc could look something like this: +# +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +# +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg19 above. +#
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bed_out.bed Mon Apr 11 20:37:19 2022 +0000 @@ -0,0 +1,4 @@ +chr6_GL000250v2_alt 938210 938801 ENST00000432222.1 0 - 938210 938801 12,12,120 1 591 0 ENST00000432222.1_f2_40_3 MAQNQASVLKNQAPVTRTQAPITGTLCQDARSNSHPVKPSRLNVFCCPHCSLTFSKKSYLSRHQKAHLTEPPNYCFHCSKSFSSFSRLVRHQQTHWKQKSYLCPICDLSFGEKEGLMDHWRGYKGKDLCQSSHHKCRVILGQWLGFSHDVPTMAGEEWKHGGDQSPPRIHTPRRRGLREKACKGDKTKEAVSILKHK unique hg38 . . . . . . . . . +chr6_GL000250v2_alt 2862985 2871389 ENST00000448800.5 0 - 2862985 2871389 12,12,120 8 14,148,145,110,132,119,184,78 0,342,616,859,2343,4930,6063,8326 ENST00000448800.5_f3_53_3 MCHTRELAFQISKEYERFSKYMPNVKVAVFFGGLSIKKDEEVLKKNCPHIVVGTPGRILALARNKSLNLKHIKHFILDECDKMLEQLDMRRDVQEIFRMTPHEKQVMMFSATLSKEIRPVCRKFMQDPMEIFVDDETKLTLHGLQQYYVKLKDNEKNRKLFDLLDVLEFNQVVIFVKSVQRCIALAQLLVEQNFPAIAIHRGMPQEERLSRYQQFKDFQRRILVATNLFGRGMDIERVNIAFNYDMPEDSDTYLHRVARAGRFGTKGLAITFVSDENDAKILNDVQDRFEVNISELPDEIDISSYIEQTR unique hg38 . . . . . . . . . +chr6_GL000250v2_alt 3481820 3484425 ENST00000445122.5 0 - 3481820 3484425 12,12,120 4 176,186,216,19 0,314,1145,2586 ENST00000445122.5_f3_80_1 MSSEKSGDSLRGPTSPAAARCRRPAPAGASPDCPGPWLRGAHARGDCGHAAAGGLRSARIPPAAAALHCLRAGLPGGHAICRRDPGGNRSDLHSPPAAPGPRAGPTGAEAPATRLHAHRGADHHLLLLAYWHHCHLQGRAGAHGLGPRRHGVGRDRFTRGPELLLHLPGRGHRGHGALYHPHRSHHHRRAAPRELLGSL unique hg38 . . . . . . . . . +chr6_GL000250v2_alt 3503057 3504078 ENST00000430777.2 0 - 3503057 3504078 12,12,120 3 145,134,180 0,481,841 ENST00000430777.2_f3_20_373 MDAAAAALPAAALPAAHPVVLQPQCQVLLQDGLLQWLDPLPGCARHPCVCRARTQRREHEDLASNAAPHQIPVRDPSGGARGSPLPSLAALCCCLQPPELSRSAWDDGGTARPLCAHCQARATVGWLCRAGLLAGRSHLHRPEAHGGCHQCHV unique hg38 . . . . . . . . .
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bicodon_out.tabular Mon Apr 11 20:37:19 2022 +0000 @@ -0,0 +1,2 @@ +Division Assembly Taxid Species Organelle Translation Table # CDS # Codon Pairs aaaaaa aaaaac aaaaag aaaaat aaaaca aaaacc aaaacg aaaact aaaaga aaaagc aaaagg aaaagt aaaata aaaatc aaaatg aaaatt aaacaa aaacac aaacag aaacat aaacca aaaccc aaaccg aaacct aaacga aaacgc aaacgg aaacgt aaacta aaactc aaactg aaactt aaagaa aaagac aaagag aaagat aaagca aaagcc aaagcg aaagct aaagga aaaggc aaaggg aaaggt aaagta aaagtc aaagtg aaagtt aaataa aaatac aaatag aaatat aaatca aaatcc aaatcg aaatct aaatga aaatgc aaatgg aaatgt aaatta aaattc aaattg aaattt aacaaa aacaac aacaag aacaat aacaca aacacc aacacg aacact aacaga aacagc aacagg aacagt aacata aacatc aacatg aacatt aaccaa aaccac aaccag aaccat aaccca aacccc aacccg aaccct aaccga aaccgc aaccgg aaccgt aaccta aacctc aacctg aacctt aacgaa aacgac aacgag aacgat aacgca aacgcc aacgcg aacgct aacgga aacggc aacggg aacggt aacgta aacgtc aacgtg aacgtt aactaa aactac aactag aactat aactca aactcc aactcg aactct aactga aactgc aactgg aactgt aactta aacttc aacttg aacttt aagaaa aagaac aagaag aagaat aagaca aagacc aagacg aagact aagaga aagagc aagagg aagagt aagata aagatc aagatg aagatt aagcaa aagcac aagcag aagcat aagcca aagccc aagccg aagcct aagcga aagcgc aagcgg aagcgt aagcta aagctc aagctg aagctt aaggaa aaggac aaggag aaggat aaggca aaggcc aaggcg aaggct aaggga aagggc aagggg aagggt aaggta aaggtc aaggtg aaggtt aagtaa aagtac aagtag aagtat aagtca aagtcc aagtcg aagtct aagtga aagtgc aagtgg aagtgt aagtta aagttc aagttg aagttt aataaa aataac aataag aataat aataca aatacc aatacg aatact aataga aatagc aatagg aatagt aatata aatatc aatatg aatatt aatcaa aatcac aatcag aatcat aatcca aatccc aatccg aatcct aatcga aatcgc aatcgg aatcgt aatcta aatctc aatctg aatctt aatgaa aatgac aatgag aatgat aatgca aatgcc aatgcg aatgct aatgga aatggc aatggg aatggt aatgta aatgtc aatgtg aatgtt aattaa aattac aattag aattat aattca aattcc aattcg aattct aattga aattgc aattgg aattgt aattta aatttc aatttg aatttt acaaaa acaaac acaaag acaaat acaaca acaacc acaacg acaact acaaga acaagc acaagg acaagt acaata acaatc acaatg acaatt acacaa acacac acacag acacat acacca acaccc acaccg acacct acacga acacgc acacgg acacgt acacta acactc acactg acactt acagaa acagac acagag acagat acagca acagcc acagcg acagct acagga acaggc acaggg acaggt acagta acagtc acagtg acagtt acataa acatac acatag acatat acatca acatcc acatcg acatct acatga acatgc acatgg acatgt acatta acattc acattg acattt accaaa accaac accaag accaat accaca accacc accacg accact accaga accagc accagg accagt accata accatc accatg accatt acccaa acccac acccag acccat acccca accccc accccg acccct acccga acccgc acccgg acccgt acccta accctc accctg accctt accgaa accgac accgag accgat accgca accgcc accgcg accgct accgga accggc accggg accggt accgta accgtc accgtg accgtt acctaa acctac acctag acctat acctca acctcc acctcg acctct acctga acctgc acctgg acctgt acctta accttc accttg accttt acgaaa acgaac acgaag acgaat acgaca acgacc acgacg acgact acgaga acgagc acgagg acgagt acgata acgatc acgatg acgatt acgcaa acgcac acgcag acgcat acgcca acgccc acgccg acgcct acgcga acgcgc acgcgg acgcgt acgcta acgctc acgctg acgctt acggaa acggac acggag acggat acggca acggcc acggcg acggct acggga acgggc acgggg acgggt acggta acggtc acggtg acggtt acgtaa acgtac acgtag acgtat acgtca acgtcc acgtcg acgtct acgtga acgtgc acgtgg acgtgt acgtta acgttc acgttg acgttt actaaa actaac actaag actaat actaca actacc actacg actact actaga actagc actagg actagt actata actatc actatg actatt actcaa actcac actcag actcat actcca actccc actccg actcct actcga actcgc actcgg actcgt actcta actctc actctg actctt actgaa actgac actgag actgat actgca actgcc actgcg actgct actgga actggc actggg actggt actgta actgtc actgtg actgtt acttaa acttac acttag acttat acttca acttcc acttcg acttct acttga acttgc acttgg acttgt acttta actttc actttg actttt agaaaa agaaac agaaag agaaat agaaca agaacc agaacg agaact agaaga agaagc agaagg agaagt agaata agaatc agaatg agaatt agacaa agacac agacag agacat agacca agaccc agaccg agacct agacga agacgc agacgg agacgt agacta agactc agactg agactt agagaa agagac agagag agagat agagca agagcc agagcg agagct agagga agaggc agaggg agaggt agagta agagtc agagtg agagtt agataa agatac agatag agatat agatca agatcc agatcg agatct agatga agatgc agatgg agatgt agatta agattc agattg agattt agcaaa agcaac agcaag agcaat agcaca agcacc agcacg agcact agcaga agcagc agcagg agcagt agcata agcatc agcatg agcatt agccaa agccac agccag agccat agccca agcccc agcccg agccct agccga agccgc agccgg agccgt agccta agcctc agcctg agcctt agcgaa agcgac agcgag agcgat agcgca agcgcc agcgcg agcgct agcgga agcggc agcggg agcggt agcgta agcgtc agcgtg agcgtt agctaa agctac agctag agctat agctca agctcc agctcg agctct agctga agctgc agctgg agctgt agctta agcttc agcttg agcttt aggaaa aggaac aggaag aggaat aggaca aggacc aggacg aggact aggaga aggagc aggagg aggagt aggata aggatc aggatg aggatt aggcaa aggcac aggcag aggcat aggcca aggccc aggccg aggcct aggcga aggcgc aggcgg aggcgt aggcta aggctc aggctg aggctt agggaa agggac agggag agggat agggca agggcc agggcg agggct agggga aggggc aggggg aggggt agggta agggtc agggtg agggtt aggtaa aggtac aggtag aggtat aggtca aggtcc aggtcg aggtct aggtga aggtgc aggtgg aggtgt aggtta aggttc aggttg aggttt agtaaa agtaac agtaag agtaat agtaca agtacc agtacg agtact agtaga agtagc agtagg agtagt agtata agtatc agtatg agtatt agtcaa agtcac agtcag agtcat agtcca agtccc agtccg agtcct agtcga agtcgc agtcgg agtcgt agtcta agtctc agtctg agtctt agtgaa agtgac agtgag agtgat agtgca agtgcc agtgcg agtgct agtgga agtggc agtggg agtggt agtgta agtgtc agtgtg agtgtt agttaa agttac agttag agttat agttca agttcc agttcg agttct agttga agttgc agttgg agttgt agttta agtttc agtttg agtttt ataaaa ataaac ataaag ataaat ataaca ataacc ataacg ataact ataaga ataagc ataagg ataagt ataata ataatc ataatg ataatt atacaa atacac atacag atacat atacca ataccc ataccg atacct atacga atacgc atacgg atacgt atacta atactc atactg atactt atagaa atagac atagag atagat atagca atagcc atagcg atagct atagga ataggc ataggg ataggt atagta atagtc atagtg atagtt atataa atatac atatag atatat atatca atatcc atatcg atatct atatga atatgc atatgg atatgt atatta atattc atattg atattt atcaaa atcaac atcaag atcaat atcaca atcacc atcacg atcact atcaga atcagc atcagg atcagt atcata atcatc atcatg atcatt atccaa atccac atccag atccat atccca atcccc atcccg atccct atccga atccgc atccgg atccgt atccta atcctc atcctg atcctt atcgaa atcgac atcgag atcgat atcgca atcgcc atcgcg atcgct atcgga atcggc atcggg atcggt atcgta atcgtc atcgtg atcgtt atctaa atctac atctag atctat atctca atctcc atctcg atctct atctga atctgc atctgg atctgt atctta atcttc atcttg atcttt atgaaa atgaac atgaag atgaat atgaca atgacc atgacg atgact atgaga atgagc atgagg atgagt atgata atgatc atgatg atgatt atgcaa atgcac atgcag atgcat atgcca atgccc atgccg atgcct atgcga atgcgc atgcgg atgcgt atgcta atgctc atgctg atgctt atggaa atggac atggag atggat atggca atggcc atggcg atggct atggga atgggc atgggg atgggt atggta atggtc atggtg atggtt atgtaa atgtac atgtag atgtat atgtca atgtcc atgtcg atgtct atgtga atgtgc atgtgg atgtgt atgtta atgttc atgttg atgttt attaaa attaac attaag attaat attaca attacc attacg attact attaga attagc attagg attagt attata attatc attatg attatt attcaa attcac attcag attcat attcca attccc attccg attcct attcga attcgc attcgg attcgt attcta attctc attctg attctt attgaa attgac attgag attgat attgca attgcc attgcg attgct attgga attggc attggg attggt attgta attgtc attgtg attgtt atttaa atttac atttag atttat atttca atttcc atttcg atttct atttga atttgc atttgg atttgt atttta attttc attttg attttt caaaaa caaaac caaaag caaaat caaaca caaacc caaacg caaact caaaga caaagc caaagg caaagt caaata caaatc caaatg caaatt caacaa caacac caacag caacat caacca caaccc caaccg caacct caacga caacgc caacgg caacgt caacta caactc caactg caactt caagaa caagac caagag caagat caagca caagcc caagcg caagct caagga caaggc caaggg caaggt caagta caagtc caagtg caagtt caataa caatac caatag caatat caatca caatcc caatcg caatct caatga caatgc caatgg caatgt caatta caattc caattg caattt cacaaa cacaac cacaag cacaat cacaca cacacc cacacg cacact cacaga cacagc cacagg cacagt cacata cacatc cacatg cacatt caccaa caccac caccag caccat caccca cacccc cacccg caccct caccga caccgc caccgg caccgt caccta cacctc cacctg cacctt cacgaa cacgac cacgag cacgat cacgca cacgcc cacgcg cacgct cacgga cacggc cacggg cacggt cacgta cacgtc cacgtg cacgtt cactaa cactac cactag cactat cactca cactcc cactcg cactct cactga cactgc cactgg cactgt cactta cacttc cacttg cacttt cagaaa cagaac cagaag cagaat cagaca cagacc cagacg cagact cagaga cagagc cagagg cagagt cagata cagatc cagatg cagatt cagcaa cagcac cagcag cagcat cagcca cagccc cagccg cagcct cagcga cagcgc cagcgg cagcgt cagcta cagctc cagctg cagctt caggaa caggac caggag caggat caggca caggcc caggcg caggct caggga cagggc cagggg cagggt caggta caggtc caggtg caggtt cagtaa cagtac cagtag cagtat cagtca cagtcc cagtcg cagtct cagtga cagtgc cagtgg cagtgt cagtta cagttc cagttg cagttt cataaa cataac cataag cataat cataca catacc catacg catact cataga catagc catagg catagt catata catatc catatg catatt catcaa catcac catcag catcat catcca catccc catccg catcct catcga catcgc catcgg catcgt catcta catctc catctg catctt catgaa catgac catgag catgat catgca catgcc catgcg catgct catgga catggc catggg catggt catgta catgtc catgtg catgtt cattaa cattac cattag cattat cattca cattcc cattcg cattct cattga cattgc cattgg cattgt cattta catttc catttg catttt ccaaaa ccaaac ccaaag ccaaat ccaaca ccaacc ccaacg ccaact ccaaga ccaagc ccaagg ccaagt ccaata ccaatc ccaatg ccaatt ccacaa ccacac ccacag ccacat ccacca ccaccc ccaccg ccacct ccacga ccacgc ccacgg ccacgt ccacta ccactc ccactg ccactt ccagaa ccagac ccagag ccagat ccagca ccagcc ccagcg ccagct ccagga ccaggc ccaggg ccaggt ccagta ccagtc ccagtg ccagtt ccataa ccatac ccatag ccatat ccatca ccatcc ccatcg ccatct ccatga ccatgc ccatgg ccatgt ccatta ccattc ccattg ccattt cccaaa cccaac cccaag cccaat cccaca cccacc cccacg cccact cccaga cccagc cccagg cccagt cccata cccatc cccatg cccatt ccccaa ccccac ccccag ccccat ccccca cccccc cccccg ccccct ccccga ccccgc ccccgg ccccgt ccccta cccctc cccctg cccctt cccgaa cccgac cccgag cccgat cccgca cccgcc cccgcg cccgct cccgga cccggc cccggg cccggt cccgta cccgtc cccgtg cccgtt ccctaa ccctac ccctag ccctat ccctca ccctcc ccctcg ccctct ccctga ccctgc ccctgg ccctgt ccctta cccttc cccttg cccttt ccgaaa ccgaac ccgaag ccgaat ccgaca ccgacc ccgacg ccgact ccgaga ccgagc ccgagg ccgagt ccgata ccgatc ccgatg ccgatt ccgcaa ccgcac ccgcag ccgcat ccgcca ccgccc ccgccg ccgcct ccgcga ccgcgc ccgcgg ccgcgt ccgcta ccgctc ccgctg ccgctt ccggaa ccggac ccggag ccggat ccggca ccggcc ccggcg ccggct ccggga ccgggc ccgggg ccgggt ccggta ccggtc ccggtg ccggtt ccgtaa ccgtac ccgtag ccgtat ccgtca ccgtcc ccgtcg ccgtct ccgtga ccgtgc ccgtgg ccgtgt ccgtta ccgttc ccgttg ccgttt cctaaa cctaac cctaag cctaat cctaca cctacc cctacg cctact cctaga cctagc cctagg cctagt cctata cctatc cctatg cctatt cctcaa cctcac cctcag cctcat cctcca cctccc cctccg cctcct cctcga cctcgc cctcgg cctcgt cctcta cctctc cctctg cctctt cctgaa cctgac cctgag cctgat cctgca cctgcc cctgcg cctgct cctgga cctggc cctggg cctggt cctgta cctgtc cctgtg cctgtt ccttaa ccttac ccttag ccttat ccttca ccttcc ccttcg ccttct ccttga ccttgc ccttgg ccttgt ccttta cctttc cctttg cctttt cgaaaa cgaaac cgaaag cgaaat cgaaca cgaacc cgaacg cgaact cgaaga cgaagc cgaagg cgaagt cgaata cgaatc cgaatg cgaatt cgacaa cgacac cgacag cgacat cgacca cgaccc cgaccg cgacct cgacga cgacgc cgacgg cgacgt cgacta cgactc cgactg cgactt cgagaa cgagac cgagag cgagat cgagca cgagcc cgagcg cgagct cgagga cgaggc cgaggg cgaggt cgagta cgagtc cgagtg cgagtt cgataa cgatac cgatag cgatat cgatca cgatcc cgatcg cgatct cgatga cgatgc cgatgg cgatgt cgatta cgattc cgattg cgattt cgcaaa cgcaac cgcaag cgcaat cgcaca cgcacc cgcacg cgcact cgcaga cgcagc cgcagg cgcagt cgcata cgcatc cgcatg cgcatt cgccaa cgccac cgccag cgccat cgccca cgcccc cgcccg cgccct cgccga cgccgc cgccgg cgccgt cgccta cgcctc cgcctg cgcctt cgcgaa cgcgac cgcgag cgcgat cgcgca cgcgcc cgcgcg cgcgct cgcgga cgcggc cgcggg cgcggt cgcgta cgcgtc cgcgtg cgcgtt cgctaa cgctac cgctag cgctat cgctca cgctcc cgctcg cgctct cgctga cgctgc cgctgg cgctgt cgctta cgcttc cgcttg cgcttt cggaaa cggaac cggaag cggaat cggaca cggacc cggacg cggact cggaga cggagc cggagg cggagt cggata cggatc cggatg cggatt cggcaa cggcac cggcag cggcat cggcca cggccc cggccg cggcct cggcga cggcgc cggcgg cggcgt cggcta cggctc cggctg cggctt cgggaa cgggac cgggag cgggat cgggca cgggcc cgggcg cgggct cgggga cggggc cggggg cggggt cgggta cgggtc cgggtg cgggtt cggtaa cggtac cggtag cggtat cggtca cggtcc cggtcg cggtct cggtga cggtgc cggtgg cggtgt cggtta cggttc cggttg cggttt cgtaaa cgtaac cgtaag cgtaat cgtaca cgtacc cgtacg cgtact cgtaga cgtagc cgtagg cgtagt cgtata cgtatc cgtatg cgtatt cgtcaa cgtcac cgtcag cgtcat cgtcca cgtccc cgtccg cgtcct cgtcga cgtcgc cgtcgg cgtcgt cgtcta cgtctc cgtctg cgtctt cgtgaa cgtgac cgtgag cgtgat cgtgca cgtgcc cgtgcg cgtgct cgtgga cgtggc cgtggg cgtggt cgtgta cgtgtc cgtgtg cgtgtt cgttaa cgttac cgttag cgttat cgttca cgttcc cgttcg cgttct cgttga cgttgc cgttgg cgttgt cgttta cgtttc cgtttg cgtttt ctaaaa ctaaac ctaaag ctaaat ctaaca ctaacc ctaacg ctaact ctaaga ctaagc ctaagg ctaagt ctaata ctaatc ctaatg ctaatt ctacaa ctacac ctacag ctacat ctacca ctaccc ctaccg ctacct ctacga ctacgc ctacgg ctacgt ctacta ctactc ctactg ctactt ctagaa ctagac ctagag ctagat ctagca ctagcc ctagcg ctagct ctagga ctaggc ctaggg ctaggt ctagta ctagtc ctagtg ctagtt ctataa ctatac ctatag ctatat ctatca ctatcc ctatcg ctatct ctatga ctatgc ctatgg ctatgt ctatta ctattc ctattg ctattt ctcaaa ctcaac ctcaag ctcaat ctcaca ctcacc ctcacg ctcact ctcaga ctcagc ctcagg ctcagt ctcata ctcatc ctcatg ctcatt ctccaa ctccac ctccag ctccat ctccca ctcccc ctcccg ctccct ctccga ctccgc ctccgg ctccgt ctccta ctcctc ctcctg ctcctt ctcgaa ctcgac ctcgag ctcgat ctcgca ctcgcc ctcgcg ctcgct ctcgga ctcggc ctcggg ctcggt ctcgta ctcgtc ctcgtg ctcgtt ctctaa ctctac ctctag ctctat ctctca ctctcc ctctcg ctctct ctctga ctctgc ctctgg ctctgt ctctta ctcttc ctcttg ctcttt ctgaaa ctgaac ctgaag ctgaat ctgaca ctgacc ctgacg ctgact ctgaga ctgagc ctgagg ctgagt ctgata ctgatc ctgatg ctgatt ctgcaa ctgcac ctgcag ctgcat ctgcca ctgccc ctgccg ctgcct ctgcga ctgcgc ctgcgg ctgcgt ctgcta ctgctc ctgctg ctgctt ctggaa ctggac ctggag ctggat ctggca ctggcc ctggcg ctggct ctggga ctgggc ctgggg ctgggt ctggta ctggtc ctggtg ctggtt ctgtaa ctgtac ctgtag ctgtat ctgtca ctgtcc ctgtcg ctgtct ctgtga ctgtgc ctgtgg ctgtgt ctgtta ctgttc ctgttg ctgttt cttaaa cttaac cttaag cttaat cttaca cttacc cttacg cttact cttaga cttagc cttagg cttagt cttata cttatc cttatg cttatt cttcaa cttcac cttcag cttcat cttcca cttccc cttccg cttcct cttcga cttcgc cttcgg cttcgt cttcta cttctc cttctg cttctt cttgaa cttgac cttgag cttgat cttgca cttgcc cttgcg cttgct cttgga cttggc cttggg cttggt cttgta cttgtc cttgtg cttgtt ctttaa ctttac ctttag ctttat ctttca ctttcc ctttcg ctttct ctttga ctttgc ctttgg ctttgt ctttta cttttc cttttg cttttt gaaaaa gaaaac gaaaag gaaaat gaaaca gaaacc gaaacg gaaact gaaaga gaaagc gaaagg gaaagt gaaata gaaatc gaaatg gaaatt gaacaa gaacac gaacag gaacat gaacca gaaccc gaaccg gaacct gaacga gaacgc gaacgg gaacgt gaacta gaactc gaactg gaactt gaagaa gaagac gaagag gaagat gaagca gaagcc gaagcg gaagct gaagga gaaggc gaaggg gaaggt gaagta gaagtc gaagtg gaagtt gaataa gaatac gaatag gaatat gaatca gaatcc gaatcg gaatct gaatga gaatgc gaatgg gaatgt gaatta gaattc gaattg gaattt gacaaa gacaac gacaag gacaat gacaca gacacc gacacg gacact gacaga gacagc gacagg gacagt gacata gacatc gacatg gacatt gaccaa gaccac gaccag gaccat gaccca gacccc gacccg gaccct gaccga gaccgc gaccgg gaccgt gaccta gacctc gacctg gacctt gacgaa gacgac gacgag gacgat gacgca gacgcc gacgcg gacgct gacgga gacggc gacggg gacggt gacgta gacgtc gacgtg gacgtt gactaa gactac gactag gactat gactca gactcc gactcg gactct gactga gactgc gactgg gactgt gactta gacttc gacttg gacttt gagaaa gagaac gagaag gagaat gagaca gagacc gagacg gagact gagaga gagagc gagagg gagagt gagata gagatc gagatg gagatt gagcaa gagcac gagcag gagcat gagcca gagccc gagccg gagcct gagcga gagcgc gagcgg gagcgt gagcta gagctc gagctg gagctt gaggaa gaggac gaggag gaggat gaggca gaggcc gaggcg gaggct gaggga gagggc gagggg gagggt gaggta gaggtc gaggtg gaggtt gagtaa gagtac gagtag gagtat gagtca gagtcc gagtcg gagtct gagtga gagtgc gagtgg gagtgt gagtta gagttc gagttg gagttt gataaa gataac gataag gataat gataca gatacc gatacg gatact gataga gatagc gatagg gatagt gatata gatatc gatatg gatatt gatcaa gatcac gatcag gatcat gatcca gatccc gatccg gatcct gatcga gatcgc gatcgg gatcgt gatcta gatctc gatctg gatctt gatgaa gatgac gatgag gatgat gatgca gatgcc gatgcg gatgct gatgga gatggc gatggg gatggt gatgta gatgtc gatgtg gatgtt gattaa gattac gattag gattat gattca gattcc gattcg gattct gattga gattgc gattgg gattgt gattta gatttc gatttg gatttt gcaaaa gcaaac gcaaag gcaaat gcaaca gcaacc gcaacg gcaact gcaaga gcaagc gcaagg gcaagt gcaata gcaatc gcaatg gcaatt gcacaa gcacac gcacag gcacat gcacca gcaccc gcaccg gcacct gcacga gcacgc gcacgg gcacgt gcacta gcactc gcactg gcactt gcagaa gcagac gcagag gcagat gcagca gcagcc gcagcg gcagct gcagga gcaggc gcaggg gcaggt gcagta gcagtc gcagtg gcagtt gcataa gcatac gcatag gcatat gcatca gcatcc gcatcg gcatct gcatga gcatgc gcatgg gcatgt gcatta gcattc gcattg gcattt gccaaa gccaac gccaag gccaat gccaca gccacc gccacg gccact gccaga gccagc gccagg gccagt gccata gccatc gccatg gccatt gcccaa gcccac gcccag gcccat gcccca gccccc gccccg gcccct gcccga gcccgc gcccgg gcccgt gcccta gccctc gccctg gccctt gccgaa gccgac gccgag gccgat gccgca gccgcc gccgcg gccgct gccgga gccggc gccggg gccggt gccgta gccgtc gccgtg gccgtt gcctaa gcctac gcctag gcctat gcctca gcctcc gcctcg gcctct gcctga gcctgc gcctgg gcctgt gcctta gccttc gccttg gccttt gcgaaa gcgaac gcgaag gcgaat gcgaca gcgacc gcgacg gcgact gcgaga gcgagc gcgagg gcgagt gcgata gcgatc gcgatg gcgatt gcgcaa gcgcac gcgcag gcgcat gcgcca gcgccc gcgccg gcgcct gcgcga gcgcgc gcgcgg gcgcgt gcgcta gcgctc gcgctg gcgctt gcggaa gcggac gcggag gcggat gcggca gcggcc gcggcg gcggct gcggga gcgggc gcgggg gcgggt gcggta gcggtc gcggtg gcggtt gcgtaa gcgtac gcgtag gcgtat gcgtca gcgtcc gcgtcg gcgtct gcgtga gcgtgc gcgtgg gcgtgt gcgtta gcgttc gcgttg gcgttt gctaaa gctaac gctaag gctaat gctaca gctacc gctacg gctact gctaga gctagc gctagg gctagt gctata gctatc gctatg gctatt gctcaa gctcac gctcag gctcat gctcca gctccc gctccg gctcct gctcga gctcgc gctcgg gctcgt gctcta gctctc gctctg gctctt gctgaa gctgac gctgag gctgat gctgca gctgcc gctgcg gctgct gctgga gctggc gctggg gctggt gctgta gctgtc gctgtg gctgtt gcttaa gcttac gcttag gcttat gcttca gcttcc gcttcg gcttct gcttga gcttgc gcttgg gcttgt gcttta gctttc gctttg gctttt ggaaaa ggaaac ggaaag ggaaat ggaaca ggaacc ggaacg ggaact ggaaga ggaagc ggaagg ggaagt ggaata ggaatc ggaatg ggaatt ggacaa ggacac ggacag ggacat ggacca ggaccc ggaccg ggacct ggacga ggacgc ggacgg ggacgt ggacta ggactc ggactg ggactt ggagaa ggagac ggagag ggagat ggagca ggagcc ggagcg ggagct ggagga ggaggc ggaggg ggaggt ggagta ggagtc ggagtg ggagtt ggataa ggatac ggatag ggatat ggatca ggatcc ggatcg ggatct ggatga ggatgc ggatgg ggatgt ggatta ggattc ggattg ggattt ggcaaa ggcaac ggcaag ggcaat ggcaca ggcacc ggcacg ggcact ggcaga ggcagc ggcagg ggcagt ggcata ggcatc ggcatg ggcatt ggccaa ggccac ggccag ggccat ggccca ggcccc ggcccg ggccct ggccga ggccgc ggccgg ggccgt ggccta ggcctc ggcctg ggcctt ggcgaa ggcgac ggcgag ggcgat ggcgca ggcgcc ggcgcg ggcgct ggcgga ggcggc ggcggg ggcggt ggcgta ggcgtc ggcgtg ggcgtt ggctaa ggctac ggctag ggctat ggctca ggctcc ggctcg ggctct ggctga ggctgc ggctgg ggctgt ggctta ggcttc ggcttg ggcttt gggaaa gggaac gggaag gggaat gggaca gggacc gggacg gggact gggaga gggagc gggagg gggagt gggata gggatc gggatg gggatt gggcaa gggcac gggcag gggcat gggcca gggccc gggccg gggcct gggcga gggcgc gggcgg gggcgt gggcta gggctc gggctg gggctt ggggaa ggggac ggggag ggggat ggggca ggggcc ggggcg ggggct ggggga gggggc gggggg gggggt ggggta ggggtc ggggtg ggggtt gggtaa gggtac gggtag gggtat gggtca gggtcc gggtcg gggtct gggtga gggtgc gggtgg gggtgt gggtta gggttc gggttg gggttt ggtaaa ggtaac ggtaag ggtaat ggtaca ggtacc ggtacg ggtact ggtaga ggtagc ggtagg ggtagt ggtata ggtatc ggtatg ggtatt ggtcaa ggtcac ggtcag ggtcat ggtcca ggtccc ggtccg ggtcct ggtcga ggtcgc ggtcgg ggtcgt ggtcta ggtctc ggtctg ggtctt ggtgaa ggtgac ggtgag ggtgat ggtgca ggtgcc ggtgcg ggtgct ggtgga ggtggc ggtggg ggtggt ggtgta ggtgtc ggtgtg ggtgtt ggttaa ggttac ggttag ggttat ggttca ggttcc ggttcg ggttct ggttga ggttgc ggttgg ggttgt ggttta ggtttc ggtttg ggtttt gtaaaa gtaaac gtaaag gtaaat gtaaca gtaacc gtaacg gtaact gtaaga gtaagc gtaagg gtaagt gtaata gtaatc gtaatg gtaatt gtacaa gtacac gtacag gtacat gtacca gtaccc gtaccg gtacct gtacga gtacgc gtacgg gtacgt gtacta gtactc gtactg gtactt gtagaa gtagac gtagag gtagat gtagca gtagcc gtagcg gtagct gtagga gtaggc gtaggg gtaggt gtagta gtagtc gtagtg gtagtt gtataa gtatac gtatag gtatat gtatca gtatcc gtatcg gtatct gtatga gtatgc gtatgg gtatgt gtatta gtattc gtattg gtattt gtcaaa gtcaac gtcaag gtcaat gtcaca gtcacc gtcacg gtcact gtcaga gtcagc gtcagg gtcagt gtcata gtcatc gtcatg gtcatt gtccaa gtccac gtccag gtccat gtccca gtcccc gtcccg gtccct gtccga gtccgc gtccgg gtccgt gtccta gtcctc gtcctg gtcctt gtcgaa gtcgac gtcgag gtcgat gtcgca gtcgcc gtcgcg gtcgct gtcgga gtcggc gtcggg gtcggt gtcgta gtcgtc gtcgtg gtcgtt gtctaa gtctac gtctag gtctat gtctca gtctcc gtctcg gtctct gtctga gtctgc gtctgg gtctgt gtctta gtcttc gtcttg gtcttt gtgaaa gtgaac gtgaag gtgaat gtgaca gtgacc gtgacg gtgact gtgaga gtgagc gtgagg gtgagt gtgata gtgatc gtgatg gtgatt gtgcaa gtgcac gtgcag gtgcat gtgcca gtgccc gtgccg gtgcct gtgcga gtgcgc gtgcgg gtgcgt gtgcta gtgctc gtgctg gtgctt gtggaa gtggac gtggag gtggat gtggca gtggcc gtggcg gtggct gtggga gtgggc gtgggg gtgggt gtggta gtggtc gtggtg gtggtt gtgtaa gtgtac gtgtag gtgtat gtgtca gtgtcc gtgtcg gtgtct gtgtga gtgtgc gtgtgg gtgtgt gtgtta gtgttc gtgttg gtgttt gttaaa gttaac gttaag gttaat gttaca gttacc gttacg gttact gttaga gttagc gttagg gttagt gttata gttatc gttatg gttatt gttcaa gttcac gttcag gttcat gttcca gttccc gttccg gttcct gttcga gttcgc gttcgg gttcgt gttcta gttctc gttctg gttctt gttgaa gttgac gttgag gttgat gttgca gttgcc gttgcg gttgct gttgga gttggc gttggg gttggt gttgta gttgtc gttgtg gttgtt gtttaa gtttac gtttag gtttat gtttca gtttcc gtttcg gtttct gtttga gtttgc gtttgg gtttgt gtttta gttttc gttttg gttttt taaaaa taaaac taaaag taaaat taaaca taaacc taaacg taaact taaaga taaagc taaagg taaagt taaata taaatc taaatg taaatt taacaa taacac taacag taacat taacca taaccc taaccg taacct taacga taacgc taacgg taacgt taacta taactc taactg taactt taagaa taagac taagag taagat taagca taagcc taagcg taagct taagga taaggc taaggg taaggt taagta taagtc taagtg taagtt taataa taatac taatag taatat taatca taatcc taatcg taatct taatga taatgc taatgg taatgt taatta taattc taattg taattt tacaaa tacaac tacaag tacaat tacaca tacacc tacacg tacact tacaga tacagc tacagg tacagt tacata tacatc tacatg tacatt taccaa taccac taccag taccat taccca tacccc tacccg taccct taccga taccgc taccgg taccgt taccta tacctc tacctg tacctt tacgaa tacgac tacgag tacgat tacgca tacgcc tacgcg tacgct tacgga tacggc tacggg tacggt tacgta tacgtc tacgtg tacgtt tactaa tactac tactag tactat tactca tactcc tactcg tactct tactga tactgc tactgg tactgt tactta tacttc tacttg tacttt tagaaa tagaac tagaag tagaat tagaca tagacc tagacg tagact tagaga tagagc tagagg tagagt tagata tagatc tagatg tagatt tagcaa tagcac tagcag tagcat tagcca tagccc tagccg tagcct tagcga tagcgc tagcgg tagcgt tagcta tagctc tagctg tagctt taggaa taggac taggag taggat taggca taggcc taggcg taggct taggga tagggc tagggg tagggt taggta taggtc taggtg taggtt tagtaa tagtac tagtag tagtat tagtca tagtcc tagtcg tagtct tagtga tagtgc tagtgg tagtgt tagtta tagttc tagttg tagttt tataaa tataac tataag tataat tataca tatacc tatacg tatact tataga tatagc tatagg tatagt tatata tatatc tatatg tatatt tatcaa tatcac tatcag tatcat tatcca tatccc tatccg tatcct tatcga tatcgc tatcgg tatcgt tatcta tatctc tatctg tatctt tatgaa tatgac tatgag tatgat tatgca tatgcc tatgcg tatgct tatgga tatggc tatggg tatggt tatgta tatgtc tatgtg tatgtt tattaa tattac tattag tattat tattca tattcc tattcg tattct tattga tattgc tattgg tattgt tattta tatttc tatttg tatttt tcaaaa tcaaac tcaaag tcaaat tcaaca tcaacc tcaacg tcaact tcaaga tcaagc tcaagg tcaagt tcaata tcaatc tcaatg tcaatt tcacaa tcacac tcacag tcacat tcacca tcaccc tcaccg tcacct tcacga tcacgc tcacgg tcacgt tcacta tcactc tcactg tcactt tcagaa tcagac tcagag tcagat tcagca tcagcc tcagcg tcagct tcagga tcaggc tcaggg tcaggt tcagta tcagtc tcagtg tcagtt tcataa tcatac tcatag tcatat tcatca tcatcc tcatcg tcatct tcatga tcatgc tcatgg tcatgt tcatta tcattc tcattg tcattt tccaaa tccaac tccaag tccaat tccaca tccacc tccacg tccact tccaga tccagc tccagg tccagt tccata tccatc tccatg tccatt tcccaa tcccac tcccag tcccat tcccca tccccc tccccg tcccct tcccga tcccgc tcccgg tcccgt tcccta tccctc tccctg tccctt tccgaa tccgac tccgag tccgat tccgca tccgcc tccgcg tccgct tccgga tccggc tccggg tccggt tccgta tccgtc tccgtg tccgtt tcctaa tcctac tcctag tcctat tcctca tcctcc tcctcg tcctct tcctga tcctgc tcctgg tcctgt tcctta tccttc tccttg tccttt tcgaaa tcgaac tcgaag tcgaat tcgaca tcgacc tcgacg tcgact tcgaga tcgagc tcgagg tcgagt tcgata tcgatc tcgatg tcgatt tcgcaa tcgcac tcgcag tcgcat tcgcca tcgccc tcgccg tcgcct tcgcga tcgcgc tcgcgg tcgcgt tcgcta tcgctc tcgctg tcgctt tcggaa tcggac tcggag tcggat tcggca tcggcc tcggcg tcggct tcggga tcgggc tcgggg tcgggt tcggta tcggtc tcggtg tcggtt tcgtaa tcgtac tcgtag tcgtat tcgtca tcgtcc tcgtcg tcgtct tcgtga tcgtgc tcgtgg tcgtgt tcgtta tcgttc tcgttg tcgttt tctaaa tctaac tctaag tctaat tctaca tctacc tctacg tctact tctaga tctagc tctagg tctagt tctata tctatc tctatg tctatt tctcaa tctcac tctcag tctcat tctcca tctccc tctccg tctcct tctcga tctcgc tctcgg tctcgt tctcta tctctc tctctg tctctt tctgaa tctgac tctgag tctgat tctgca tctgcc tctgcg tctgct tctgga tctggc tctggg tctggt tctgta tctgtc tctgtg tctgtt tcttaa tcttac tcttag tcttat tcttca tcttcc tcttcg tcttct tcttga tcttgc tcttgg tcttgt tcttta tctttc tctttg tctttt tgaaaa tgaaac tgaaag tgaaat tgaaca tgaacc tgaacg tgaact tgaaga tgaagc tgaagg tgaagt tgaata tgaatc tgaatg tgaatt tgacaa tgacac tgacag tgacat tgacca tgaccc tgaccg tgacct tgacga tgacgc tgacgg tgacgt tgacta tgactc tgactg tgactt tgagaa tgagac tgagag tgagat tgagca tgagcc tgagcg tgagct tgagga tgaggc tgaggg tgaggt tgagta tgagtc tgagtg tgagtt tgataa tgatac tgatag tgatat tgatca tgatcc tgatcg tgatct tgatga tgatgc tgatgg tgatgt tgatta tgattc tgattg tgattt tgcaaa tgcaac tgcaag tgcaat tgcaca tgcacc tgcacg tgcact tgcaga tgcagc tgcagg tgcagt tgcata tgcatc tgcatg tgcatt tgccaa tgccac tgccag tgccat tgccca tgcccc tgcccg tgccct tgccga tgccgc tgccgg tgccgt tgccta tgcctc tgcctg tgcctt tgcgaa tgcgac tgcgag tgcgat tgcgca tgcgcc tgcgcg tgcgct tgcgga tgcggc tgcggg tgcggt tgcgta tgcgtc tgcgtg tgcgtt tgctaa tgctac tgctag tgctat tgctca tgctcc tgctcg tgctct tgctga tgctgc tgctgg tgctgt tgctta tgcttc tgcttg tgcttt tggaaa tggaac tggaag tggaat tggaca tggacc tggacg tggact tggaga tggagc tggagg tggagt tggata tggatc tggatg tggatt tggcaa tggcac tggcag tggcat tggcca tggccc tggccg tggcct tggcga tggcgc tggcgg tggcgt tggcta tggctc tggctg tggctt tgggaa tgggac tgggag tgggat tgggca tgggcc tgggcg tgggct tgggga tggggc tggggg tggggt tgggta tgggtc tgggtg tgggtt tggtaa tggtac tggtag tggtat tggtca tggtcc tggtcg tggtct tggtga tggtgc tggtgg tggtgt tggtta tggttc tggttg tggttt tgtaaa tgtaac tgtaag tgtaat tgtaca tgtacc tgtacg tgtact tgtaga tgtagc tgtagg tgtagt tgtata tgtatc tgtatg tgtatt tgtcaa tgtcac tgtcag tgtcat tgtcca tgtccc tgtccg tgtcct tgtcga tgtcgc tgtcgg tgtcgt tgtcta tgtctc tgtctg tgtctt tgtgaa tgtgac tgtgag tgtgat tgtgca tgtgcc tgtgcg tgtgct tgtgga tgtggc tgtggg tgtggt tgtgta tgtgtc tgtgtg tgtgtt tgttaa tgttac tgttag tgttat tgttca tgttcc tgttcg tgttct tgttga tgttgc tgttgg tgttgt tgttta tgtttc tgtttg tgtttt ttaaaa ttaaac ttaaag ttaaat ttaaca ttaacc ttaacg ttaact ttaaga ttaagc ttaagg ttaagt ttaata ttaatc ttaatg ttaatt ttacaa ttacac ttacag ttacat ttacca ttaccc ttaccg ttacct ttacga ttacgc ttacgg ttacgt ttacta ttactc ttactg ttactt ttagaa ttagac ttagag ttagat ttagca ttagcc ttagcg ttagct ttagga ttaggc ttaggg ttaggt ttagta ttagtc ttagtg ttagtt ttataa ttatac ttatag ttatat ttatca ttatcc ttatcg ttatct ttatga ttatgc ttatgg ttatgt ttatta ttattc ttattg ttattt ttcaaa ttcaac ttcaag ttcaat ttcaca ttcacc ttcacg ttcact ttcaga ttcagc ttcagg ttcagt ttcata ttcatc ttcatg ttcatt ttccaa ttccac ttccag ttccat ttccca ttcccc ttcccg ttccct ttccga ttccgc ttccgg ttccgt ttccta ttcctc ttcctg ttcctt ttcgaa ttcgac ttcgag ttcgat ttcgca ttcgcc ttcgcg ttcgct ttcgga ttcggc ttcggg ttcggt ttcgta ttcgtc ttcgtg ttcgtt ttctaa ttctac ttctag ttctat ttctca ttctcc ttctcg ttctct ttctga ttctgc ttctgg ttctgt ttctta ttcttc ttcttg ttcttt ttgaaa ttgaac ttgaag ttgaat ttgaca ttgacc ttgacg ttgact ttgaga ttgagc ttgagg ttgagt ttgata ttgatc ttgatg ttgatt ttgcaa ttgcac ttgcag ttgcat ttgcca ttgccc ttgccg ttgcct ttgcga ttgcgc ttgcgg ttgcgt ttgcta ttgctc ttgctg ttgctt ttggaa ttggac ttggag ttggat ttggca ttggcc ttggcg ttggct ttggga ttgggc ttgggg ttgggt ttggta ttggtc ttggtg ttggtt ttgtaa ttgtac ttgtag ttgtat ttgtca ttgtcc ttgtcg ttgtct ttgtga ttgtgc ttgtgg ttgtgt ttgtta ttgttc ttgttg ttgttt tttaaa tttaac tttaag tttaat tttaca tttacc tttacg tttact tttaga tttagc tttagg tttagt tttata tttatc tttatg tttatt tttcaa tttcac tttcag tttcat tttcca tttccc tttccg tttcct tttcga tttcgc tttcgg tttcgt tttcta tttctc tttctg tttctt tttgaa tttgac tttgag tttgat tttgca tttgcc tttgcg tttgct tttgga tttggc tttggg tttggt tttgta tttgtc tttgtg tttgtt ttttaa ttttac ttttag ttttat ttttca ttttcc ttttcg ttttct ttttga ttttgc ttttgg ttttgt ttttta tttttc tttttg tttttt +custom hg38 9606 Homo sapiens genomic 1 4 855 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 2 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 2 1 0 0 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 1 0 2 0 0 0 0 0 0 0 1 0 1 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 1 0 0 0 1 0 0 1 3 2 0 0 0 0 0 0 2 0 0 0 1 1 0 0 0 2 1 0 1 0 0 0 0 0 0 0 1 0 0 1 2 1 1 0 2 0 0 0 1 0 0 0 0 0 1 0 0 0 0 1 0 0 2 0 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 1 0 0 0 0 0 2 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 1 0 0 2 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 2 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 2 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 3 0 0 0 0 0 0 1 0 0 0 1 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 2 0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 1 1 0 0 0 0 0 0 0 1 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 1 0 0 1 0 1 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 1 1 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 1 1 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 2 0 1 0 0 0 1 0 0 0 1 0 1 1 2 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 2 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 1 0 0 1 0 0 0 0 0 0 0 2 1 0 1 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 2 2 0 0 0 1 0 0 0 2 0 1 0 0 0 0 1 0 1 1 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 2 0 0 0 0 1 0 2 2 0 1 1 0 0 0 1 0 0 0 1 0 0 0 0 3 0 0 1 0 0 0 0 1 0 1 1 0 0 1 0 1 2 2 0 0 0 0 0 0 0 0 1 1 0 0 1 0 0 0 0 0 0 0 1 1 1 0 0 0 1 2 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 1 0 1 3 1 1 0 1 1 3 1 0 0 1 1 2 1 0 0 1 0 1 0 0 1 0 1 2 0 1 0 0 0 0 0 0 0 0 0 0 0 3 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 1 0 1 1 0 0 1 0 0 0 2 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 2 0 0 3 0 0 1 0 0 0 1 2 1 0 2 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 5 0 0 0 0 1 1 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 1 0 0 0 0 1 0 0 0 1 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 1 0 0 2 0 2 0 0 1 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 1 0 0 0 2 0 0 1 0 0 0 1 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 0 2 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 2 0 1 0 0 1 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 2 0 0 0 1 0 0 0 0 0 0 3 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 0 0 0 0 1 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 1 0 1 0 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 1 0 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 1 0 1 0 1 0 0 0 1 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 2 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 2 0 1 0 2 1 1 0 0 0 1 0 0 0 1 0 0 0 0 0 1 0 1 0 1 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 2 0 0 2 0 1 0 3 1 0 0 1 1 1 2 1 1 0 1 0 0 0 1 0 0 2 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 2 1 0 0 0 0 0 0 0 0 1 0 0 1 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 1 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 2 2 0 0 1 0 0 0 1 0 0 0 0 0 0 0 2 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 1 0 0 0 0 1 0 3 1 0 0 1 0 0 0 1 0 1 2 0 0 0 0 1 0 0 0 1 0 0 1 1 0 0 1 1 0 1 0 1 1 1 0 1 0 0 1 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 1 1 0 2 0 0 0 0 0 1 2 0 3 2 0 2 0 1 1 1 0 0 0 3 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1 2 0 1 0 1 0 1 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 2 1 1 0 1 1 0 1 0 3 2 2 1 1 0 0 0 1 1 0 1 0 1 0 0 0 1 0 0 3 0 1 0 1 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 2 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 1 1 0 0 0 0 1 2 0 1 0 0 1 2 0 7 0 3 2 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 2 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 1 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 0 1 0 0 0 1 1 0 1 2 1 2 0 1 3 1 0 1 3 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 1 0 1 1 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 2 0 1 0 2 0 0 0 0 0 0 1 0 0 0 0 1 2 0 0 0 0 0 1 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 1 0 1 2 2 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 1 1 2 0 0 1 0 0 0 1 0 0 0 2 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 0 1 0 1 0 1 1 0 0 0 1 1 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 1 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 2 0 2 0 0 1 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 2 0 1 2 1 1 1 1 0 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 2 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 1 2 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 1 0 1 0 1 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 1 0 1 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 1 0 1 0 0 1 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 2 0 0 0 1 0 0 0 0 1 0 0 0 0 0 1 1 0 1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 1 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 2 1 1 0 0 2 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cds_fasta_out.fasta Mon Apr 11 20:37:19 2022 +0000 @@ -0,0 +1,8 @@ +>ENST00000432222.1 +GGCCAGGTCCCAGGAACCCATATTTAGAACTGAGGGTCCTATGGCCCAGAACCAGGCATCTGTACTTAAGAACCAAGCACCTGTGACCAGGACCCAGGCACCCATCACTGGAACCCTCTGTCAGGATGCCAGATCCAACTCTCATCCAGTGAAGCCCTCAAGACTCAATGTCTTCTGTTGCCCCCATTGTTCTTTGACTTTTAGCAAGAAATCCTATCTCTCCAGACACCAGAAGGCCCACCTCACAGAGCCGCCCAACTACTGCTTCCATTGCAGCAAGTCTTTCAGCTCATTTTCCAGGCTGGTCAGACACCAGCAGACCCACTGGAAGCAGAAGAGCTACCTTTGCCCTATCTGTGACCTCTCCTTTGGGGAGAAAGAGGGCCTTATGGATCACTGGAGGGGCTATAAAGGCAAGGACCTGTGCCAGAGCAGCCACCATAAATGCCGGGTGATCCTGGGCCAGTGGCTTGGCTTCTCTCATGATGTCCCCACTATGGCTGGGGAGGAATGGAAGCATGGAGGTGATCAATCTCCCCCCAGGATCCATACCCCCAGGAGAAGAGGCCTAAGAGAGAAGGCCTGCAAAGGAGACAAAACAAAGGAGGCAGTGAGCATCTTGAAACATAAATA +>ENST00000448800.5 +TGGCCACACTGCAACAGCTGGAGCCAGTTACTGGGCAGGTGTCTGTACTGGTGATGTGTCACACTCGGGAGTTGGCTTTTCAGATCAGCAAGGAATATGAGCGCTTCTCTAAATACATGCCCAATGTCAAGGTTGCTGTTTTTTTTGGTGGTCTGTCTATCAAGAAGGATGAAGAGGTGCTGAAGAAGAACTGCCCGCATATCGTCGTGGGGACTCCAGGCCGTATCCTAGCCCTGGCTCGAAATAAGAGCCTCAACCTCAAACACATTAAACACTTTATTTTGGATGAATGTGATAAGATGCTTGAACAGCTCGACATGCGTCGGGATGTCCAGGAAATTTTTCGCATGACCCCCCACGAGAAGCAGGTCATGATGTTCAGTGCTACCTTGAGCAAAGAGATCCGTCCAGTCTGCCGCAAGTTCATGCAAGATCCAATGGAGATCTTCGTGGATGATGAGACGAAGTTGACGCTGCATGGGTTGCAGCAGTACTACGTGAAACTGAAGGACAACGAGAAGAACCGGAAGCTCTTTGACCTTCTGGATGTCCTTGAGTTCAACCAGGTGGTGATCTTTGTGAAGTCTGTGCAGCGGTGCATTGCCTTGGCCCAGCTACTAGTGGAGCAGAACTTCCCAGCCATTGCCATCCACCGTGGGATGCCCCAGGAGGAGAGGCTTTCTCGGTATCAGCAGTTTAAAGATTTTCAACGACGAATTCTTGTGGCTACCAACCTATTTGGCCGAGGCATGGACATCGAGCGGGTGAACATTGCTTTTAATTATGACATGCCTGAGGATTCTGACACCTACCTGCATCGGGTGGCCAGAGCAGGCCGGTTTGGCACCAAGGGCTTGGCTATCACATTTGTGTCCGATGAGAATGATGCCAAGATCCTCAATGATGTGCAGGATCGCTTTGAGGTCAATATTAGTGAGCTGCCTGATGAGATAGACATCTCCTCCTACATTGAACAGACACGGT +>ENST00000445122.5 +ATGCCGGGCACCCAGACTCCAGCACCGGCCGAGGACCCCCACTCCGGCTGCAGGGACCCTGTCCCAGCGAGACCGCAGGCATGTCATCCGAAAAGTCAGGAGACTCGCTTCGAGGGCCCACTTcccccgccgccgcccgctgccgccgccccgcccccgccggcgccagccCAGACTGCCCAGGCCCCTGGCTTCGTGGTGCCCACGCACGCGGGGACTGTGGGCACGCTGCCGCTGGGGGGCTACGTAGCGCCCGGATACCCCCTGCAGCTGCAGCCTTGCACTGCTTACGTGCCGGTCTACCCGGTGGGCACGCCATATGCAGGCGGGACCCCGGGGGGAACAGGAGTGACCTCCACTCTCCCCCCGCCGCCCCAGGGCCCAGGGCTGGCCCTACTGGAGCCGAGGCGCCCGCCACACGACTACATGCCCATCGCGGTGCTGACCACCATCTGTTGCTTCTGGCCTACTGGCATCATTGCCATCTTCAAGGCCGTGCAGGTGCGCACGGCCTTGGCCCGCGGAGACATGGTGTCGGCCGAGATCGCTTCACGCGAGGCCCGGAACTTCTCCTTCATCTCCCTGGCCGTGGGCATCGCGGCCATGGTGCTCTGTACCATCCTCACCGTAGTCATCATCATCGCCGCGCAGCACCACGAGAACTACTGGGATCCCTAA +>ENST00000430777.2 +ATGGATTTGTGGCCAGGGGCATGGAtgctgctgctgctgctcttcctgctgctgctcttcctgctgcCCACCCTGTGGTTCTGCAGCCCCAGTGCCAAGTACTTCTTCAAGATGGCCTTCTACAATGGCTGGATCCTCTTCCTGGCTGTGCTCGCCATCCCTGTGTGTGCCGTGCGAGGACGCAACGTCGAGAACATGAAGATCTTGCGTCTAATGCTGCTCCACATCAAATACCTGTACGGGATCCGAGTGGAGGTGCGAGGGGCTCACCACTTCCCTCCCTCGCAGCCCTATGTTGTTGTCTCCAACCACCAGAGCTCTCTCGATCTGCTTGGGATGATGGAGGTACTGCCAGGCCGCTGTGTGCCCATTGCCAAGCGCGAGCTACTGTGggctggctctgccgggctggcctgctggctggcaggAGTCATCTTCATCGACCGGAAGCGCACGGGGGATGCCATCAGTGTCATGTCTGAGGTCGCCCAGACCCTGCTCACCCAGGACGTGAGGGTCTGGGTGTTTCCTGAGGGAACGAGAAACCACAATGGCTCCATGCTGCCCTTCAAACGTGGCGCCTTCCATCTTGCAGTGCAGGCCCAGGTTCCCATTGTCCCCATAGTCATGTCCTCCTACCAAGACTTCTACTGCAAGAAGGAGCGTCGCTTCACCTCGGGACAATGTCAGGTGCGGGTGCTGCCCCCAGTGCCCACGGAAGGGCTGACACCAGATGACGTCCCAGCTCTGGCTGACAGAGTCCGGCACTCCATGCTCACTGTTTTCCGGGAAATCTCCACTGATGGCCGGGGTGGTGGTGACTATCTGAAGAAGCCTGGGGGCGGTGGGTGA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/codon_freq_from_bicodon.tabular Mon Apr 11 20:37:19 2022 +0000 @@ -0,0 +1,5 @@ +#frame codon_count aaa aac aag aat aca acc acg act aga agc agg agt ata atc atg att caa cac cag cat cca ccc ccg cct cga cgc cgg cgt cta ctc ctg ctt gaa gac gag gat gca gcc gcg gct gga ggc ggg ggt gta gtc gtg gtt taa tac tag tat tca tcc tcg tct tga tgc tgg tgt tta ttc ttg ttt +2-counts 855 4 12 14 8 13 23 8 15 28 18 25 12 8 27 31 7 3 11 17 4 23 28 16 14 4 12 2 1 10 16 35 7 4 7 16 4 16 38 6 14 15 15 18 4 4 11 25 4 1 12 3 3 18 15 7 16 16 17 26 10 8 20 19 7 +2-percent 855 0.4678362573099415500088582576 1.4035087719298244834931210789 1.6374269005847954527865795171 0.9356725146198831000177165151 1.5204678362573098571175478355 2.6900584795321638154064203263 0.9356725146198831000177165151 1.7543859649122806043664013487 3.2748538011695909055731590342 2.1052631578947367252396816184 2.9239766081871341185660639894 1.4035087719298244834931210789 0.9356725146198831000177165151 3.1578947368421053099041273526 3.6257309941520468044018343790 0.8187134502923977263932897586 0.3508771929824561208732802697 1.2865497076023393319132992474 1.9883040935672515736598597869 0.4678362573099415500088582576 2.6900584795321638154064203263 3.2748538011695909055731590342 1.8713450292397662000354330303 1.6374269005847954527865795171 0.4678362573099415500088582576 1.4035087719298244834931210789 0.2339181286549707750044291288 0.1169590643274853875022145644 1.1695906432748537362442675658 1.8713450292397662000354330303 4.0935672514619882988995414053 0.8187134502923977263932897586 0.4678362573099415500088582576 0.8187134502923977263932897586 1.8713450292397662000354330303 0.4678362573099415500088582576 1.8713450292397662000354330303 4.4444444444444446418174266000 0.7017543859649122417465605395 1.6374269005847954527865795171 1.7543859649122806043664013487 1.7543859649122806043664013487 2.1052631578947367252396816184 0.4678362573099415500088582576 0.4678362573099415500088582576 1.2865497076023393319132992474 2.9239766081871341185660639894 0.4678362573099415500088582576 0.1169590643274853875022145644 1.4035087719298244834931210789 0.3508771929824561208732802697 0.3508771929824561208732802697 2.1052631578947367252396816184 1.7543859649122806043664013487 0.8187134502923977263932897586 1.8713450292397662000354330303 1.8713450292397662000354330303 1.9883040935672515736598597869 3.0409356725146197142350956710 1.1695906432748537362442675658 0.9356725146198831000177165151 2.3391812865497074724885351316 2.2222222222222223209087133000 0.8187134502923977263932897586 +3-counts 855 3 2 3 4 12 10 10 10 17 12 16 5 2 7 7 3 23 10 19 18 37 23 19 25 7 16 10 4 8 14 19 18 27 10 14 12 23 12 8 19 31 18 11 11 3 7 8 9 11 6 4 9 22 15 17 20 27 36 28 19 1 5 9 10 +3-percent 855 0.3508771929824561208732802697 0.2339181286549707750044291288 0.3508771929824561208732802697 0.4678362573099415500088582576 1.4035087719298244834931210789 1.1695906432748537362442675658 1.1695906432748537362442675658 1.1695906432748537362442675658 1.9883040935672515736598597869 1.4035087719298244834931210789 1.8713450292397662000354330303 0.5847953216374268681221337829 0.2339181286549707750044291288 0.8187134502923977263932897586 0.8187134502923977263932897586 0.3508771929824561208732802697 2.6900584795321638154064203263 1.1695906432748537362442675658 2.2222222222222223209087133000 2.1052631578947367252396816184 4.3274853801169594902376047685 2.6900584795321638154064203263 2.2222222222222223209087133000 2.9239766081871341185660639894 0.8187134502923977263932897586 1.8713450292397662000354330303 1.1695906432748537362442675658 0.4678362573099415500088582576 0.9356725146198831000177165151 1.6374269005847954527865795171 2.2222222222222223209087133000 2.1052631578947367252396816184 3.1578947368421053099041273526 1.1695906432748537362442675658 1.6374269005847954527865795171 1.4035087719298244834931210789 2.6900584795321638154064203263 1.4035087719298244834931210789 0.9356725146198831000177165151 2.2222222222222223209087133000 3.6257309941520468044018343790 2.1052631578947367252396816184 1.2865497076023393319132992474 1.2865497076023393319132992474 0.3508771929824561208732802697 0.8187134502923977263932897586 0.9356725146198831000177165151 1.0526315789473683626198408092 1.2865497076023393319132992474 0.7017543859649122417465605395 0.4678362573099415500088582576 1.0526315789473683626198408092 2.5730994152046786638265984948 1.7543859649122806043664013487 1.9883040935672515736598597869 2.3391812865497074724885351316 3.1578947368421053099041273526 4.2105263157894734504793632368 3.2748538011695909055731590342 2.2222222222222223209087133000 0.1169590643274853875022145644 0.5847953216374268681221337829 1.0526315789473683626198408092 1.1695906432748537362442675658
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/codon_freq_from_bicodon_log.txt Mon Apr 11 20:37:19 2022 +0000 @@ -0,0 +1,7 @@ +Using 4096 codon pairs +Reporting 64 codons +Found a total of 100.000000 percent for 2. +Found a total of 100.000000 percent for 3. +Reporting 21 amino acids +Found a total of 100.000000 percent for 2-aa. +Found a total of 100.000000 percent for 3-aa.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/codon_out.tabular Mon Apr 11 20:37:19 2022 +0000 @@ -0,0 +1,2 @@ +Division Assembly Taxid Species Organelle Translation Table # CDS # Codons aaa aac aag aat aca acc acg act aga agc agg agt ata atc atg att caa cac cag cat cca ccc ccg cct cga cgc cgg cgt cta ctc ctg ctt gaa gac gag gat gca gcc gcg gct gga ggc ggg ggt gta gtc gtg gtt taa tac tag tat tca tcc tcg tct tga tgc tgg tgt tta ttc ttg ttt +custom hg38 9606 Homo sapiens genomic 1 4 859 14 13 29 8 5 10 4 9 9 12 13 6 4 17 17 9 12 21 28 33 13 26 7 14 13 14 15 12 12 17 20 25 13 16 26 27 12 33 7 30 9 28 16 12 3 12 21 3 0 9 0 5 5 12 1 15 0 20 9 14 1 11 11 17
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/find_naltorfs_log.txt Mon Apr 11 20:37:19 2022 +0000 @@ -0,0 +1,7 @@ +{'chr6_GL000250v2_alt': 4672374} +Excluding 77 unique canonical coding sequences from 77 BED lines. +cds length is not divisible by 3!!!: 634. chr6_GL000250v2_alt 938190 938841 ENST00000432222.1 0 - 938207 938841 12,12,120 1 651 0 +cds length is not divisible by 3!!!: 271. chr6_GL000250v2_alt 2057211 2060006 ENST00000420604.1 0 - 2057618 2060006 12,12,120 3 437,165,76 0,2313,2719 +cds length is not divisible by 3!!!: 986. chr6_GL000250v2_alt 2862767 2871733 ENST00000448800.5 0 - 2862982 2871733 12,12,120 9 232,148,145,110,132,119,184,93,38 0,560,834,1077,2561,5148,6281,8544,8928 +cds length is not divisible by 3!!!: 580. chr6_GL000250v2_alt 3655068 3704125 ENST00000425722.1 0 - 3655068 3703952 12,12,120 19 21,21,24,21,21,21,21,21,21,39,90,21,21,21,30,33,33,87,186 0,637,1109,8114,9561,12963,13438,14141,16499,17121,26913,32289,32814,43342,43448,45110,45965,47083,48871 +transcripts: 77 unique LNO ORFs: 4
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gencode_canonical.hg38.chr6_GL0002508.bed Mon Apr 11 20:37:19 2022 +0000 @@ -0,0 +1,77 @@ +chr6_GL000250v2_alt 168585 189574 ENST00000437160.6 0 - 169653 189217 12,12,120 8 1664,27,33,116,23,231,96,777, 0,4234,5806,5971,8695,17011,18891,20212, +chr6_GL000250v2_alt 260408 270885 ENST00000426120.2 0 - 260624 269578 12,12,120 7 1802,105,127,92,82,269,253, 0,3949,4702,5171,8728,9161,10224, +chr6_GL000250v2_alt 309837 310865 ENST00000421027.1 0 - 309837 310800 12,12,120 1 1028, 0, +chr6_GL000250v2_alt 351929 352936 ENST00000420178.3 0 - 351929 352871 12,12,120 1 1007, 0, +chr6_GL000250v2_alt 377510 378446 ENST00000428081.1 0 + 377510 378446 12,12,120 1 936, 0, +chr6_GL000250v2_alt 438928 440200 ENST00000440124.4 0 + 439261 440200 12,12,120 1 1272, 0, +chr6_GL000250v2_alt 572355 573321 ENST00000420495.4 0 + 572355 573321 12,12,120 1 966, 0, +chr6_GL000250v2_alt 691168 722731 ENST00000435232.5 0 - 692358 693306 12,12,120 5 2229,48,125,124,85, 0,3408,4796,6359,31478, +chr6_GL000250v2_alt 704969 706618 ENST00000455234.1 0 + 705679 706618 12,12,120 1 1649, 0, +chr6_GL000250v2_alt 722840 729989 ENST00000453467.5 0 + 727430 728381 12,12,120 4 27,116,51,2830, 0,1273,3224,4319, +chr6_GL000250v2_alt 752426 753563 ENST00000418003.3 0 - 752426 753563 12,12,120 1 1137, 0, +chr6_GL000250v2_alt 821306 825621 ENST00000414436.2 0 - 821574 825397 12,12,120 2 739,251, 0,4064, +chr6_GL000250v2_alt 821306 893756 ENST00000449163.5 0 - 869267 893509 12,12,120 19 739,176,144,129,128,94,108,117,133,151,78,64,243,192,66,102,171,135,553, 0,47959,48912,49278,49987,50809,51316,51521,53020,53648,55343,56970,57662,65507,66158,66510,67711,68296,71897, +chr6_GL000250v2_alt 853147 854731 ENST00000453513.4 0 + 853637 854576 12,12,120 1 1584, 0, +chr6_GL000250v2_alt 922689 938078 ENST00000548017.1 0 + 922918 936889 12,12,120 7 317,348,114,21,21,21,1203, 0,2336,9166,10657,10918,13715,14186, +chr6_GL000250v2_alt 938190 938841 ENST00000432222.1 0 - 938207 938841 12,12,120 1 651, 0, +chr6_GL000250v2_alt 1093432 1096751 ENST00000615098.4 0 + 1093456 1095994 12,12,120 7 97,270,276,276,117,33,372, 0,226,722,1597,1995,2557,2947, +chr6_GL000250v2_alt 1147347 1267669 ENST00000640829.1 0 + 1147490 1266858 12,12,120 11 216,16,14,240,276,276,33,84,93,35,281, 0,340,39005,53186,53667,54522,68203,118979,119506,119955,120041, +chr6_GL000250v2_alt 1319351 1323001 ENST00000443494.6 0 + 1319622 1322789 12,12,120 4 416,101,110,237, 0,663,970,3413, +chr6_GL000250v2_alt 1324800 1328425 ENST00000441553.5 0 + 1326768 1327398 12,12,120 3 301,109,1230, 0,1881,2395, +chr6_GL000250v2_alt 1328357 1333979 ENST00000450015.2 0 - 1329202 1333881 12,12,120 4 1426,92,23,665, 0,2891,3190,4957, +chr6_GL000250v2_alt 1368538 1371196 ENST00000422484.1 0 - 1368538 1370895 12,12,120 4 231,96,500,43, 0,1199,1940,2615, +chr6_GL000250v2_alt 1394223 1406831 ENST00000433713.5 0 + 1395132 1405908 12,12,120 6 62,649,96,225,23,1011, 0,605,9866,10857,11392,11597, +chr6_GL000250v2_alt 1410040 1419029 ENST00000449208.6 0 - 1412064 1418953 12,12,120 7 2542,33,116,23,231,96,505, 0,3782,4994,5424,6453,7204,8484, +chr6_GL000250v2_alt 1421285 1430769 ENST00000552062.2 0 + 1421971 1430429 12,12,120 8 860,96,231,9,14,116,33,858, 0,3970,5094,6035,6633,7295,7770,8626, +chr6_GL000250v2_alt 1442505 1471482 ENST00000425523.5 0 - 1443926 1457164 12,12,120 9 2104,33,116,23,231,96,488,104,123, 0,4719,4970,5565,12071,13984,14221,16590,28854, +chr6_GL000250v2_alt 1656333 1673225 ENST00000458607.5 0 + 1658808 1671755 12,12,120 8 152,153,460,96,231,23,116,2018, 0,2046,2468,3938,8907,13070,13667,14874, +chr6_GL000250v2_alt 1658801 1676354 ENST00000550581.3 0 + 1658808 1676306 12,12,120 10 460,96,231,23,116,185,101,83,126,146, 0,1470,6439,10602,11199,12406,15992,16185,17126,17407, +chr6_GL000250v2_alt 1870880 1883062 ENST00000383450.3 0 - 1875575 1883062 12,12,120 7 4937,141,163,179,195,96,208, 0,5319,5735,5975,11091,11708,11974, +chr6_GL000250v2_alt 1988977 2002543 ENST00000450902.6 0 - 1988977 2002347 12,12,120 12 153,99,211,116,111,192,204,255,57,163,239,403, 0,264,560,3155,3439,5329,5599,6007,10938,11318,11564,13163, +chr6_GL000250v2_alt 2005895 2017402 ENST00000438815.2 0 - 2006775 2015525 12,12,120 3 900,211,3488, 0,2790,8019, +chr6_GL000250v2_alt 2017553 2020927 ENST00000427944.5 0 - 2018167 2020481 12,12,120 5 896,177,197,440,45, 0,1229,2000,2795,3329, +chr6_GL000250v2_alt 2029311 2047393 ENST00000427406.6 0 - 2029969 2044679 12,12,120 15 826,111,126,183,120,2478,71,792,93,60,1481,70,381,139,645, 0,2746,2945,3297,3611,3814,7576,7758,11604,11859,12066,13840,13995,15232,17437, +chr6_GL000250v2_alt 2049703 2054929 ENST00000421473.6 0 + 2050009 2053900 12,12,120 4 363,109,111,2087, 0,2336,2717,3139, +chr6_GL000250v2_alt 2057211 2060006 ENST00000420604.1 0 - 2057618 2060006 12,12,120 3 437,165,76, 0,2313,2719, +chr6_GL000250v2_alt 2217891 2218933 ENST00000434428.5 0 + 2218232 2218933 12,12,120 3 426,103,229, 0,519,813, +chr6_GL000250v2_alt 2313217 2319398 ENST00000430305.2 0 + 2313458 2317689 12,12,120 3 302,1445,1904, 0,2514,4277, +chr6_GL000250v2_alt 2827448 2843673 ENST00000619506.1 0 + 2838192 2842459 12,12,120 6 115,255,288,279,132,1342, 0,10718,11244,12123,12501,14883, +chr6_GL000250v2_alt 2861265 2862781 ENST00000424816.2 0 + 2861563 2862484 12,12,120 2 469,486, 0,1030, +chr6_GL000250v2_alt 2862767 2871733 ENST00000448800.5 0 - 2862982 2871733 12,12,120 9 232,148,145,110,132,119,184,93,38, 0,560,834,1077,2561,5148,6281,8544,8928, +chr6_GL000250v2_alt 2909112 2910915 ENST00000445232.2 0 + 2909112 2910116 12,12,120 3 46,48,1221, 0,233,582, +chr6_GL000250v2_alt 2913133 2915004 ENST00000457552.2 0 - 2913287 2914996 12,12,120 4 609,72,46,170, 0,1004,1259,1701, +chr6_GL000250v2_alt 2918702 2921488 ENST00000433676.6 0 + 2919878 2921255 12,12,120 4 192,119,23,392, 0,1076,1820,2394, +chr6_GL000250v2_alt 2921473 2925564 ENST00000432392.6 0 - 2921772 2925300 12,12,120 4 337,108,345,307, 0,631,887,3784, +chr6_GL000250v2_alt 3089203 3095288 ENST00000430216.5 0 + 3090740 3095143 12,12,120 14 325,872,73,110,81,88,190,127,150,75,144,138,74,257, 0,830,1956,2174,2505,2833,3194,3498,4098,4596,4880,5226,5520,5828, +chr6_GL000250v2_alt 3095410 3097463 ENST00000436320.5 0 + 3095666 3097143 12,12,120 4 370,141,96,413, 0,616,1265,1640, +chr6_GL000250v2_alt 3098201 3109905 ENST00000452329.2 0 - 3098205 3109390 12,12,120 17 181,175,140,302,93,153,133,186,117,113,170,196,111,97,279,249,186, 0,293,655,873,1568,1772,2032,3427,4084,4410,7363,7651,8925,9742,10373,10955,11518, +chr6_GL000250v2_alt 3130006 3139596 ENST00000432122.2 0 - 3130367 3139369 12,12,120 5 487,60,31,68,230, 0,594,1081,8680,9360, +chr6_GL000250v2_alt 3142240 3147687 ENST00000426095.2 0 - 3142668 3144594 12,12,120 2 2367,177, 0,5270, +chr6_GL000250v2_alt 3148164 3150568 ENST00000422919.1 0 + 3148378 3150304 12,12,120 2 289,1620, 0,784, +chr6_GL000250v2_alt 3160359 3162874 ENST00000450744.1 0 + 3160575 3162501 12,12,120 1 2515, 0, +chr6_GL000250v2_alt 3167532 3172376 ENST00000430690.5 0 + 3169944 3172278 12,12,120 5 283,41,219,200,219, 0,492,1382,2319,4625, +chr6_GL000250v2_alt 3191667 3195522 ENST00000423382.2 0 - 3192334 3195392 12,12,120 6 894,223,183,263,193,289, 0,990,1387,2136,2947,3566, +chr6_GL000250v2_alt 3195810 3211661 ENST00000550401.2 0 - 3196242 3211595 12,12,120 20 554,85,95,71,74,104,95,254,103,93,100,236,84,88,61,100,79,74,49,106, 0,1457,1621,1815,2120,2303,2495,2678,5960,6350,7406,7615,8117,8277,11264,11733,12656,12860,13838,15745, +chr6_GL000250v2_alt 3212373 3230306 ENST00000421926.6 0 - 3212698 3230299 12,12,120 28 506,176,79,87,116,78,145,168,155,105,102,123,107,83,183,236,144,149,102,115,135,156,41,85,254,219,67,49, 0,914,1255,2940,3135,3603,4047,4633,4886,5153,6822,7016,7236,7818,8020,8282,8604,8840,9210,9469,9709,12652,12939,13071,16511,16854,17171,17884, +chr6_GL000250v2_alt 3406267 3418718 ENST00000433037.1 0 - 3406267 3418718 12,12,120 6 306,333,306,291,297,369, 0,5366,7700,8368,10745,12082, +chr6_GL000250v2_alt 3480956 3485682 ENST00000445122.5 0 - 3481819 3484701 12,12,120 6 1040,186,216,91,81,112, 0,1178,2009,3450,3737,4614, +chr6_GL000250v2_alt 3497183 3500883 ENST00000414111.6 0 + 3498767 3500558 12,12,120 9 75,129,123,110,96,171,80,154,372, 0,1556,1916,2119,2328,2505,2768,2921,3328, +chr6_GL000250v2_alt 3500813 3510698 ENST00000430777.2 0 - 3501877 3504098 12,12,120 7 1237,73,96,176,134,209,96, 0,1754,1953,2213,2725,3085,9789, +chr6_GL000250v2_alt 3510955 3513395 ENST00000449794.2 0 + 3511113 3512928 12,12,120 6 298,19,113,55,118,565, 0,1187,1297,1523,1655,1875, +chr6_GL000250v2_alt 3513569 3516924 ENST00000549839.5 0 - 3513744 3516824 12,12,120 5 272,83,123,107,152, 0,383,2660,2913,3203, +chr6_GL000250v2_alt 3517334 3522785 ENST00000453487.2 0 - 3518980 3522514 12,12,120 9 1739,87,89,154,136,191,248,74,492, 0,1868,2077,2506,2911,3230,3521,3987,4959, +chr6_GL000250v2_alt 3523364 3528122 ENST00000457070.1 0 - 3523964 3525112 12,12,120 8 738,200,103,59,84,81,93,98, 0,943,1382,1706,2275,3544,3971,4660, +chr6_GL000250v2_alt 3527441 3536999 ENST00000425600.1 0 - 3528035 3536999 12,12,120 12 1308,98,148,296,139,82,220,176,384,524,113,253, 0,1481,2082,2456,3579,3807,4084,6000,6286,7245,8938,9305, +chr6_GL000250v2_alt 3655068 3704125 ENST00000425722.1 0 - 3655068 3703952 12,12,120 19 21,21,24,21,21,21,21,21,21,39,90,21,21,21,30,33,33,87,186, 0,637,1109,8114,9561,12963,13438,14141,16499,17121,26913,32289,32814,43342,43448,45110,45965,47083,48871, +chr6_GL000250v2_alt 3726193 3739359 ENST00000445928.5 0 - 3726199 3739352 12,12,120 7 95,282,348,21,282,348,86, 0,781,2076,7820,8970,10974,13080, +chr6_GL000250v2_alt 3823888 3837634 ENST00000426847.2 0 - 3824827 3837535 12,12,120 6 953,24,111,282,270,199, 0,1752,2249,3043,5625,13547, +chr6_GL000250v2_alt 4047519 4053351 ENST00000449560.5 0 + 4047581 4052533 12,12,120 4 144,249,282,973, 0,3778,4410,4859, +chr6_GL000250v2_alt 4062236 4069674 ENST00000419685.6 0 - 4062591 4069610 12,12,120 6 369,24,111,282,267,161, 0,1184,1676,2753,5563,7277, +chr6_GL000250v2_alt 4117786 4122072 ENST00000426644.6 0 - 4118239 4121975 12,12,120 6 489,32,111,282,270,188, 0,677,960,1557,2281,4098, +chr6_GL000250v2_alt 4126846 4143590 ENST00000439425.6 0 - 4127302 4143099 12,12,120 12 486,137,160,174,189,129,198,206,131,115,497,75, 0,7419,7949,8286,8637,10352,10646,13173,13662,15556,15760,16669, +chr6_GL000250v2_alt 4145581 4149568 ENST00000436627.2 0 - 4145823 4149278 12,12,120 6 331,205,130,112,148,425, 0,814,1417,1955,2225,3562, +chr6_GL000250v2_alt 4150073 4158843 ENST00000418205.2 0 - 4150443 4158681 12,12,120 11 577,137,163,174,189,129,198,206,131,115,940, 0,1859,2304,2710,3443,3781,5109,5733,6900,7179,7830, +chr6_GL000250v2_alt 4159070 4184429 ENST00000425855.2 0 + 4159094 4184317 12,12,120 6 84,68,130,142,93,156, 0,1932,3799,4158,22841,25203, +chr6_GL000250v2_alt 4239473 4245915 ENST00000395312.7 0 - 4239815 4245652 12,12,120 6 359,36,117,285,282,318, 0,713,907,2543,4055,6124, +chr6_GL000250v2_alt 4369248 4385378 ENST00000443117.5 0 - 4373329 4378232 12,12,120 6 779,167,282,246,199,31, 0,4069,4450,5072,8884,16099, +chr6_GL000250v2_alt 4380587 4391853 ENST00000411749.6 0 + 4380703 4390890 12,12,120 6 216,264,282,111,24,663, 0,4752,9014,9843,10283,10603,
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/naltorfs_fasta_out.fasta Mon Apr 11 20:37:19 2022 +0000 @@ -0,0 +1,8 @@ +>ENST00000432222.1|chromosome:hg38:chr6_GL000250v2_alt:938210:938801:- +ATGGCCCAGAACCAGGCATCTGTACTTAAGAACCAAGCACCTGTGACCAGGACCCAGGCACCCATCACTGGAACCCTCTGTCAGGATGCCAGATCCAACTCTCATCCAGTGAAGCCCTCAAGACTCAATGTCTTCTGTTGCCCCCATTGTTCTTTGACTTTTAGCAAGAAATCCTATCTCTCCAGACACCAGAAGGCCCACCTCACAGAGCCGCCCAACTACTGCTTCCATTGCAGCAAGTCTTTCAGCTCATTTTCCAGGCTGGTCAGACACCAGCAGACCCACTGGAAGCAGAAGAGCTACCTTTGCCCTATCTGTGACCTCTCCTTTGGGGAGAAAGAGGGCCTTATGGATCACTGGAGGGGCTATAAAGGCAAGGACCTGTGCCAGAGCAGCCACCATAAATGCCGGGTGATCCTGGGCCAGTGGCTTGGCTTCTCTCATGATGTCCCCACTATGGCTGGGGAGGAATGGAAGCATGGAGGTGATCAATCTCCCCCCAGGATCCATACCCCCAGGAGAAGAGGCCTAAGAGAGAAGGCCTGCAAAGGAGACAAAACAAAGGAGGCAGTGAGCATCTTGAAACATAAA +>ENST00000448800.5|chromosome:hg38:chr6_GL000250v2_alt:2862985:2871389:- +ATGTGTCACACTCGGGAGTTGGCTTTTCAGATCAGCAAGGAATATGAGCGCTTCTCTAAATACATGCCCAATGTCAAGGTTGCTGTTTTTTTTGGTGGTCTGTCTATCAAGAAGGATGAAGAGGTGCTGAAGAAGAACTGCCCGCATATCGTCGTGGGGACTCCAGGCCGTATCCTAGCCCTGGCTCGAAATAAGAGCCTCAACCTCAAACACATTAAACACTTTATTTTGGATGAATGTGATAAGATGCTTGAACAGCTCGACATGCGTCGGGATGTCCAGGAAATTTTTCGCATGACCCCCCACGAGAAGCAGGTCATGATGTTCAGTGCTACCTTGAGCAAAGAGATCCGTCCAGTCTGCCGCAAGTTCATGCAAGATCCAATGGAGATCTTCGTGGATGATGAGACGAAGTTGACGCTGCATGGGTTGCAGCAGTACTACGTGAAACTGAAGGACAACGAGAAGAACCGGAAGCTCTTTGACCTTCTGGATGTCCTTGAGTTCAACCAGGTGGTGATCTTTGTGAAGTCTGTGCAGCGGTGCATTGCCTTGGCCCAGCTACTAGTGGAGCAGAACTTCCCAGCCATTGCCATCCACCGTGGGATGCCCCAGGAGGAGAGGCTTTCTCGGTATCAGCAGTTTAAAGATTTTCAACGACGAATTCTTGTGGCTACCAACCTATTTGGCCGAGGCATGGACATCGAGCGGGTGAACATTGCTTTTAATTATGACATGCCTGAGGATTCTGACACCTACCTGCATCGGGTGGCCAGAGCAGGCCGGTTTGGCACCAAGGGCTTGGCTATCACATTTGTGTCCGATGAGAATGATGCCAAGATCCTCAATGATGTGCAGGATCGCTTTGAGGTCAATATTAGTGAGCTGCCTGATGAGATAGACATCTCCTCCTACATTGAACAGACACGG +>ENST00000445122.5|chromosome:hg38:chr6_GL000250v2_alt:3481820:3484425:- +ATGTCATCCGAAAAGTCAGGAGACTCGCTTCGAGGGCCCACTTcccccgccgccgcccgctgccgccgccccgcccccgccggcgccagccCAGACTGCCCAGGCCCCTGGCTTCGTGGTGCCCACGCACGCGGGGACTGTGGGCACGCTGCCGCTGGGGGGCTACGTAGCGCCCGGATACCCCCTGCAGCTGCAGCCTTGCACTGCTTACGTGCCGGTCTACCCGGTGGGCACGCCATATGCAGGCGGGACCCCGGGGGGAACAGGAGTGACCTCCACTCTCCCCCCGCCGCCCCAGGGCCCAGGGCTGGCCCTACTGGAGCCGAGGCGCCCGCCACACGACTACATGCCCATCGCGGTGCTGACCACCATCTGTTGCTTCTGGCCTACTGGCATCATTGCCATCTTCAAGGCCGTGCAGGTGCGCACGGCCTTGGCCCGCGGAGACATGGTGTCGGCCGAGATCGCTTCACGCGAGGCCCGGAACTTCTCCTTCATCTCCCTGGCCGTGGGCATCGCGGCCATGGTGCTCTGTACCATCCTCACCGTAGTCATCATCATCGCCGCGCAGCACCACGAGAACTACTGGGATCCCTA +>ENST00000430777.2|chromosome:hg38:chr6_GL000250v2_alt:3503057:3504078:- +ATGGAtgctgctgctgctgctcttcctgctgctgctcttcctgctgcCCACCCTGTGGTTCTGCAGCCCCAGTGCCAAGTACTTCTTCAAGATGGCCTTCTACAATGGCTGGATCCTCTTCCTGGCTGTGCTCGCCATCCCTGTGTGTGCCGTGCGAGGACGCAACGTCGAGAACATGAAGATCTTGCGTCTAATGCTGCTCCACATCAAATACCTGTACGGGATCCGAGTGGAGGTGCGAGGGGCTCACCACTTCCCTCCCTCGCAGCCCTATGTTGTTGTCTCCAACCACCAGAGCTCTCTCGATCTGCTTGGGATGATGGAGGTACTGCCAGGCCGCTGTGTGCCCATTGCCAAGCGCGAGCTACTGTGggctggctctgccgggctggcctgctggctggcaggAGTCATCTTCATCGACCGGAAGCGCACGGGGGATGCCATCAGTGTCATGTC
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/peptide_fasta_out.fasta Mon Apr 11 20:37:19 2022 +0000 @@ -0,0 +1,8 @@ +>ENST00000432222.1|chromosome:hg38:chr6_GL000250v2_alt:938210:938801:- +MAQNQASVLKNQAPVTRTQAPITGTLCQDARSNSHPVKPSRLNVFCCPHCSLTFSKKSYLSRHQKAHLTEPPNYCFHCSKSFSSFSRLVRHQQTHWKQKSYLCPICDLSFGEKEGLMDHWRGYKGKDLCQSSHHKCRVILGQWLGFSHDVPTMAGEEWKHGGDQSPPRIHTPRRRGLREKACKGDKTKEAVSILKHK +>ENST00000448800.5|chromosome:hg38:chr6_GL000250v2_alt:2862985:2871389:- +MCHTRELAFQISKEYERFSKYMPNVKVAVFFGGLSIKKDEEVLKKNCPHIVVGTPGRILALARNKSLNLKHIKHFILDECDKMLEQLDMRRDVQEIFRMTPHEKQVMMFSATLSKEIRPVCRKFMQDPMEIFVDDETKLTLHGLQQYYVKLKDNEKNRKLFDLLDVLEFNQVVIFVKSVQRCIALAQLLVEQNFPAIAIHRGMPQEERLSRYQQFKDFQRRILVATNLFGRGMDIERVNIAFNYDMPEDSDTYLHRVARAGRFGTKGLAITFVSDENDAKILNDVQDRFEVNISELPDEIDISSYIEQTR +>ENST00000445122.5|chromosome:hg38:chr6_GL000250v2_alt:3481820:3484425:- +MSSEKSGDSLRGPTSPAAARCRRPAPAGASPDCPGPWLRGAHARGDCGHAAAGGLRSARIPPAAAALHCLRAGLPGGHAICRRDPGGNRSDLHSPPAAPGPRAGPTGAEAPATRLHAHRGADHHLLLLAYWHHCHLQGRAGAHGLGPRRHGVGRDRFTRGPELLLHLPGRGHRGHGALYHPHRSHHHRRAAPRELLGSL +>ENST00000430777.2|chromosome:hg38:chr6_GL000250v2_alt:3503057:3504078:- +MDAAAAALPAAALPAAHPVVLQPQCQVLLQDGLLQWLDPLPGCARHPCVCRARTQRREHEDLASNAAPHQIPVRDPSGGARGSPLPSLAALCCCLQPPELSRSAWDDGGTARPLCAHCQARATVGWLCRAGLLAGRSHLHRPEAHGGCHQCHV
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/twobit.loc Mon Apr 11 20:37:19 2022 +0000 @@ -0,0 +1,26 @@ +#This is a sample file distributed with Galaxy that is used by some +#tools. The twobit.loc file has this format (white space characters +#are TAB characters): +# +#<Build> <FullPathToFile> +# +#So, for example, if you had droPer1 twobit files stored in +#/depot/data2/galaxy/droPer1/, then the twobit.loc entry +#would look like this: +# +#droPer1 /depot/data2/galaxy/droPer1/droPer1.2bit +# +#and your /depot/data2/galaxy/droPer1/ directory would +#contain all of your twobit files (e.g.): +# +#-rw-rw-r-- 1 nate galaxy 48972650 2007-05-04 11:27 droPer1.2bit +#...etc... +# +#Your twobit.loc file should include an entry per line for each twobit +#file you have stored. For example: +# +#droPer1 /depot/data2/galaxy/droPer1/droPer1.2bit +#apiMel2 /depot/data2/galaxy/apiMel2/apiMel2.2bit +#droAna1 /depot/data2/galaxy/droAna1/droAna1.2bit +#droAna2 /depot/data2/galaxy/droAna2/droAna2.2bit +#...etc...
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/all_fasta.loc.sample Mon Apr 11 20:37:19 2022 +0000 @@ -0,0 +1,18 @@ +#This file lists the locations and dbkeys of all the fasta files +#under the "genome" directory (a directory that contains a directory +#for each build). The script extract_fasta.py will generate the file +#all_fasta.loc. This file has the format (white space characters are +#TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_path> +# +#So, all_fasta.loc could look something like this: +# +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +# +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg19 above. +#
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/twobit.loc.sample Mon Apr 11 20:37:19 2022 +0000 @@ -0,0 +1,26 @@ +#This is a sample file distributed with Galaxy that is used by some +#tools. The twobit.loc file has this format (white space characters +#are TAB characters): +# +#<Build> <FullPathToFile> +# +#So, for example, if you had droPer1 twobit files stored in +#/depot/data2/galaxy/droPer1/, then the twobit.loc entry +#would look like this: +# +#droPer1 /depot/data2/galaxy/droPer1/droPer1.2bit +# +#and your /depot/data2/galaxy/droPer1/ directory would +#contain all of your twobit files (e.g.): +# +#-rw-rw-r-- 1 nate galaxy 48972650 2007-05-04 11:27 droPer1.2bit +#...etc... +# +#Your twobit.loc file should include an entry per line for each twobit +#file you have stored. For example: +# +#droPer1 /depot/data2/galaxy/droPer1/droPer1.2bit +#apiMel2 /depot/data2/galaxy/apiMel2/apiMel2.2bit +#droAna1 /depot/data2/galaxy/droAna1/droAna1.2bit +#droAna2 /depot/data2/galaxy/droAna2/droAna2.2bit +#...etc...
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Mon Apr 11 20:37:19 2022 +0000 @@ -0,0 +1,13 @@ +<?xml version="1.0"?> +<tables> + <!-- Locations of reference fasta files --> + <table name="all_fasta" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/all_fasta.loc" /> + </table> + <!-- Locations of indexes in the 2bit format --> + <table name="twobit" comment_char="#" allow_duplicate_entries="False"> + <columns>value, path</columns> + <file path="tool-data/twobit.loc" /> + </table> +</tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Mon Apr 11 20:37:19 2022 +0000 @@ -0,0 +1,13 @@ +<?xml version="1.0"?> +<tables> + <!-- Locations of reference fasta files --> + <table name="all_fasta" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="${__HERE__}/test-data/all_fasta.loc" /> + </table> + <!-- Locations of indexes in the 2bit format --> + <table name="twobit" comment_char="#" allow_duplicate_entries="False"> + <columns>value, path</columns> + <file path="${__HERE__}/test-data/twobit.loc" /> + </table> +</tables>