Repository 'codon_freq_from_bicodons'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/codon_freq_from_bicodons

Changeset 0:cc5a776acd47 (2022-04-11)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/naltorfs/ commit cbedf7b5968b45a08df88d4ad799951d6f50a2bd"
added:
codon_freq_from_bicodons.xml
macros.xml
test-data/aa_freq_from_bicodon.tabular
test-data/all_fasta.loc
test-data/bed_out.bed
test-data/bicodon_out.tabular
test-data/cds_fasta_out.fasta
test-data/codon_freq_from_bicodon.tabular
test-data/codon_freq_from_bicodon_log.txt
test-data/codon_out.tabular
test-data/find_naltorfs_log.txt
test-data/gencode_canonical.hg38.chr6_GL0002508.bed
test-data/hg38.chr6_GL000250v2_alt.2bit.gz
test-data/naltorfs_fasta_out.fasta
test-data/peptide_fasta_out.fasta
test-data/twobit.loc
tool-data/all_fasta.loc.sample
tool-data/twobit.loc.sample
tool_data_table_conf.xml.sample
tool_data_table_conf.xml.test
b
diff -r 000000000000 -r cc5a776acd47 codon_freq_from_bicodons.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/codon_freq_from_bicodons.xml Mon Apr 11 20:36:16 2022 +0000
[
@@ -0,0 +1,68 @@
+<tool id="codon_freq_from_bicodons" name="Get Codon frequency" version="@TOOL_VERSION@" profile="20.05">
+    <description>from bicodons</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="edam_ontology"/>
+    <expand macro="requirements"/>
+    <version_command>codon_freq_from_bicodons.py --version</version_command>
+    <command detect_errors="exit_code"><![CDATA[
+        codon_freq_from_bicodons.py
+
+            --bicodons '$bicodons'
+
+            --taxid '$taxid'
+
+            --organelle '$organelle'
+
+            --out '$out'
+
+            --aa_out '$aa_out'
+
+            #if $write_log:
+                > '$log'
+            #end if
+    ]]></command>
+    <inputs>
+        <param argument="--bicodons" type="data" format="tabular" label="Bicondon rates input" help="Formatted like CoCoPUTs."/>
+        <param argument="--taxid" type="text" value="9606" label="Taxonomy ID of interest" help="9606 is Homo sapiens (human)"/>
+        <param argument="--organelle" type="text" value="genomic" label="Organelle of interest" help="genomic is default"/>
+        <param name="write_log" type="boolean" truevalue="true" falsevalue="false" checked="True" label="Create log file"/>
+    </inputs>
+    <outputs>
+        <data name="out" format="tabular" label="Get Codon frequency on ${on_string}: codon counts"/>
+        <data name="aa_out" format="tabular" label="Get Codon frequency on ${on_string}: amino acid counts"/>
+        <data name="log" format="txt" label="Get Codon frequency on ${on_string}: log">
+            <filter>write_log</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="3">
+            <param name="bicodons" value="bicodon_out.tabular"/>
+            <param name="taxid" value="9606"/>
+            <param name="organelle" value="genomic"/>
+            <param name="write_log" value="true"/>
+            <output name="out" value="codon_freq_from_bicodon.tabular"/>
+            <output name="aa_out" value="aa_freq_from_bicodon.tabular"/>
+            <output name="log" value="codon_freq_from_bicodon_log.txt"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+Get Codon frequency from bicodons.
+
+Input format should match that provided by CoCoPUTs (https://dnahive.fda.gov/dna.cgi?cmd=codon_usage&id=537&mode=cocoputs; e.g. https://dnahive.fda.gov/dna.cgi?cmd=objFile&ids=537&filename=Refseq_Bicod.tsv&raw=1).
+
+Input row of interest is selected by the combination of the provided taxid and organelle.
+
+Translation table identifiers are based upon NCBI standards (https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi).
+
+Example of CoCoPUTs style bicodon input, the majority of bicodons (codon pairs) have been removed in this example, for brevity:
+
+  +-----------+-----------+-------+--------------+-----------+-------------------+-------+---------------+--------+--------+--------+--------+-----+
+  | Division  | Assembly  | Taxid | Species      | Organelle | Translation Table | # CDS | # Codon Pairs | aaaaaa | aaaaac | aaaaag | aaaaat | ... |
+  +===========+===========+=======+==============+===========+===================+=======+===============+========+========+========+========+=====+
+  | custom    | hg38      | 9606  | Homo sapiens | genomic   | 1                 | 4     | 859           | 0      | 0      |     0  | 1      | ... |
+  +-----------+-----------+-------+--------------+-----------+-------------------+-------+---------------+--------+--------+--------+--------+-----+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
b
diff -r 000000000000 -r cc5a776acd47 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Mon Apr 11 20:36:16 2022 +0000
b
@@ -0,0 +1,63 @@
+<macros>
+    <token name="@TOOL_VERSION@">0.1.2</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">naltorfs</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <xml name="translation_table_select_parameter">
+        <param argument="--translation_table" type="select" label="Translation table">
+            <option value="1" selected="true">Table 1 Standard, SGC0</option>
+            <option value="2">Table 2 Vertebrate Mitochondrial, SGC1</option>
+            <option value="3">Table 3 Yeast Mitochondrial, SGC2</option>
+            <option value="4">Table 4 Mold Mitochondrial, Protozoan Mitochondrial, Coelenterate Mitochondrial, Mycoplasma, Spiroplasma, SGC3</option>
+            <option value="5">Table 5 Invertebrate Mitochondrial, SGC4</option>
+            <option value="6">Table 6 Ciliate Nuclear, Dasycladacean Nuclear, Hexamita Nuclear, SGC5</option>
+            <option value="9">Table 9 Echinoderm Mitochondrial, Flatworm Mitochondrial, SGC8</option>
+            <option value="10">Table 10 Euplotid Nuclear, SGC9</option>
+            <option value="11">Table 11 Bacterial, Archaeal, Plant Plastid</option>
+            <option value="12">Table 12 Alternative Yeast Nuclear</option>
+            <option value="13">Table 13 Ascidian Mitochondrial</option>
+            <option value="14">Table 14 Alternative Flatworm Mitochondrial</option>
+            <option value="15">Table 15 Blepharisma Macronuclear</option>
+            <option value="16">Table 16 Chlorophycean Mitochondrial</option>
+            <option value="21">Table 21 Trematode Mitochondrial</option>
+            <option value="22">Table 22 Scenedesmus obliquus Mitochondrial</option>
+            <option value="23">Table 23 Thraustochytrium Mitochondrial</option>
+            <option value="24">Table 24 Pterobranchia Mitochondrial</option>
+            <option value="25">Table 25 Candidate Division SR1, Gracilibacteria</option>
+            <option value="26">Table 26 Pachysolen tannophilus Nuclear</option>
+            <option value="27">Table 27 Karyorelict Nuclear</option>
+            <option value="28">Table 28 Condylostoma Nuclear</option>
+            <option value="29">Table 29 Mesodinium Nuclear</option>
+            <option value="30">Table 30 Peritrich Nuclear</option>
+            <option value="31">Table 31 Blastocrithidia Nuclear</option>
+            <option value="32">Table 32 Balanophoraceae Plastid</option>
+            <option value="33">Table 33 Cephalodiscidae Mitochondrial</option>
+        </param>
+    </xml>
+    <xml name="edam_ontology">
+        <edam_topics>
+            <edam_topic>topic_0622</edam_topic>
+            <edam_topic>topic_0091</edam_topic>
+        </edam_topics>
+        <edam_operations>
+            <edam_operation>operation_0286</edam_operation>
+            <edam_operation>operation_0284</edam_operation>
+            <edam_operation>operation_0436</edam_operation>
+            <edam_operation>operation_0362</edam_operation>
+        </edam_operations>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="bibtex">
+                @UNPUBLISHED{nAltORFs,
+                    author = "Kommireddy Vasu and Debjit Khan and Iyappan Ramachandiran and Daniel Blankenberg and Paul L. Fox",
+                    title = "Nested Alternate Open Reading Frames and their Encoded Proteins: The Hidden Orfeome",
+                    year = "2022"
+                }
+            </citation>
+        </citations>
+    </xml>
+</macros>
b
diff -r 000000000000 -r cc5a776acd47 test-data/aa_freq_from_bicodon.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/aa_freq_from_bicodon.tabular Mon Apr 11 20:36:16 2022 +0000
b
@@ -0,0 +1,5 @@
+#frame aa_count * A C D E F G H I K L M N P Q R S T V W Y
+2-counts 855 20 74 27 11 20 27 52 15 42 18 95 31 20 81 20 72 86 59 44 26 15
+2-percent 855 2.3391812865497074724885351316 8.6549707602339189804752095370 3.1578947368421053099041273526 1.2865497076023393319132992474 2.3391812865497074724885351316 3.1578947368421053099041273526 6.0818713450292394284701913421 1.7543859649122806043664013487 4.9122807017543861363151336263 2.1052631578947367252396816184 11.1111111111111107163651467999 3.6257309941520468044018343790 2.3391812865497074724885351316 9.4736842105263168178908017580 2.3391812865497074724885351316 8.4210526315789469009587264736 10.0584795321637425757899109158 6.9005847953216372658857835631 5.1461988304093573276531969896 3.0409356725146197142350956710 1.7543859649122806043664013487
+3-counts 855 42 62 55 22 41 15 71 28 12 6 69 7 6 104 42 70 91 42 27 28 15
+3-percent 855 4.9122807017543861363151336263 7.2514619883040936088036687579 6.4327485380116957713880765368 2.5730994152046786638265984948 4.7953216374269000965568920947 1.7543859649122806043664013487 8.3040935672514617493789046421 3.2748538011695909055731590342 1.4035087719298244834931210789 0.7017543859649122417465605395 8.0701754385964914462192609790 0.8187134502923977263932897586 0.7017543859649122417465605395 12.1637426900584788569403826841 4.9122807017543861363151336263 8.1871345029239765977990828105 10.6432748538011701100458594738 4.9122807017543861363151336263 3.1578947368421053099041273526 3.2748538011695909055731590342 1.7543859649122806043664013487
b
diff -r 000000000000 -r cc5a776acd47 test-data/all_fasta.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/all_fasta.loc Mon Apr 11 20:36:16 2022 +0000
b
@@ -0,0 +1,18 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id> <dbkey> <display_name> <file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa
+#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa
+#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
b
diff -r 000000000000 -r cc5a776acd47 test-data/bed_out.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bed_out.bed Mon Apr 11 20:36:16 2022 +0000
b
@@ -0,0 +1,4 @@
+chr6_GL000250v2_alt 938210 938801 ENST00000432222.1 0 - 938210 938801 12,12,120 1 591 0 ENST00000432222.1_f2_40_3 MAQNQASVLKNQAPVTRTQAPITGTLCQDARSNSHPVKPSRLNVFCCPHCSLTFSKKSYLSRHQKAHLTEPPNYCFHCSKSFSSFSRLVRHQQTHWKQKSYLCPICDLSFGEKEGLMDHWRGYKGKDLCQSSHHKCRVILGQWLGFSHDVPTMAGEEWKHGGDQSPPRIHTPRRRGLREKACKGDKTKEAVSILKHK unique hg38 . . . . . . . . .
+chr6_GL000250v2_alt 2862985 2871389 ENST00000448800.5 0 - 2862985 2871389 12,12,120 8 14,148,145,110,132,119,184,78 0,342,616,859,2343,4930,6063,8326 ENST00000448800.5_f3_53_3 MCHTRELAFQISKEYERFSKYMPNVKVAVFFGGLSIKKDEEVLKKNCPHIVVGTPGRILALARNKSLNLKHIKHFILDECDKMLEQLDMRRDVQEIFRMTPHEKQVMMFSATLSKEIRPVCRKFMQDPMEIFVDDETKLTLHGLQQYYVKLKDNEKNRKLFDLLDVLEFNQVVIFVKSVQRCIALAQLLVEQNFPAIAIHRGMPQEERLSRYQQFKDFQRRILVATNLFGRGMDIERVNIAFNYDMPEDSDTYLHRVARAGRFGTKGLAITFVSDENDAKILNDVQDRFEVNISELPDEIDISSYIEQTR unique hg38 . . . . . . . . .
+chr6_GL000250v2_alt 3481820 3484425 ENST00000445122.5 0 - 3481820 3484425 12,12,120 4 176,186,216,19 0,314,1145,2586 ENST00000445122.5_f3_80_1 MSSEKSGDSLRGPTSPAAARCRRPAPAGASPDCPGPWLRGAHARGDCGHAAAGGLRSARIPPAAAALHCLRAGLPGGHAICRRDPGGNRSDLHSPPAAPGPRAGPTGAEAPATRLHAHRGADHHLLLLAYWHHCHLQGRAGAHGLGPRRHGVGRDRFTRGPELLLHLPGRGHRGHGALYHPHRSHHHRRAAPRELLGSL unique hg38 . . . . . . . . .
+chr6_GL000250v2_alt 3503057 3504078 ENST00000430777.2 0 - 3503057 3504078 12,12,120 3 145,134,180 0,481,841 ENST00000430777.2_f3_20_373 MDAAAAALPAAALPAAHPVVLQPQCQVLLQDGLLQWLDPLPGCARHPCVCRARTQRREHEDLASNAAPHQIPVRDPSGGARGSPLPSLAALCCCLQPPELSRSAWDDGGTARPLCAHCQARATVGWLCRAGLLAGRSHLHRPEAHGGCHQCHV unique hg38 . . . . . . . . .
b
diff -r 000000000000 -r cc5a776acd47 test-data/bicodon_out.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bicodon_out.tabular Mon Apr 11 20:36:16 2022 +0000
b
b'@@ -0,0 +1,2 @@\n+Division\tAssembly\tTaxid\tSpecies\tOrganelle\tTranslation Table\t# CDS\t# Codon Pairs\taaaaaa\taaaaac\taaaaag\taaaaat\taaaaca\taaaacc\taaaacg\taaaact\taaaaga\taaaagc\taaaagg\taaaagt\taaaata\taaaatc\taaaatg\taaaatt\taaacaa\taaacac\taaacag\taaacat\taaacca\taaaccc\taaaccg\taaacct\taaacga\taaacgc\taaacgg\taaacgt\taaacta\taaactc\taaactg\taaactt\taaagaa\taaagac\taaagag\taaagat\taaagca\taaagcc\taaagcg\taaagct\taaagga\taaaggc\taaaggg\taaaggt\taaagta\taaagtc\taaagtg\taaagtt\taaataa\taaatac\taaatag\taaatat\taaatca\taaatcc\taaatcg\taaatct\taaatga\taaatgc\taaatgg\taaatgt\taaatta\taaattc\taaattg\taaattt\taacaaa\taacaac\taacaag\taacaat\taacaca\taacacc\taacacg\taacact\taacaga\taacagc\taacagg\taacagt\taacata\taacatc\taacatg\taacatt\taaccaa\taaccac\taaccag\taaccat\taaccca\taacccc\taacccg\taaccct\taaccga\taaccgc\taaccgg\taaccgt\taaccta\taacctc\taacctg\taacctt\taacgaa\taacgac\taacgag\taacgat\taacgca\taacgcc\taacgcg\taacgct\taacgga\taacggc\taacggg\taacggt\taacgta\taacgtc\taacgtg\taacgtt\taactaa\taactac\taactag\taactat\taactca\taactcc\taactcg\taactct\taactga\taactgc\taactgg\taactgt\taactta\taacttc\taacttg\taacttt\taagaaa\taagaac\taagaag\taagaat\taagaca\taagacc\taagacg\taagact\taagaga\taagagc\taagagg\taagagt\taagata\taagatc\taagatg\taagatt\taagcaa\taagcac\taagcag\taagcat\taagcca\taagccc\taagccg\taagcct\taagcga\taagcgc\taagcgg\taagcgt\taagcta\taagctc\taagctg\taagctt\taaggaa\taaggac\taaggag\taaggat\taaggca\taaggcc\taaggcg\taaggct\taaggga\taagggc\taagggg\taagggt\taaggta\taaggtc\taaggtg\taaggtt\taagtaa\taagtac\taagtag\taagtat\taagtca\taagtcc\taagtcg\taagtct\taagtga\taagtgc\taagtgg\taagtgt\taagtta\taagttc\taagttg\taagttt\taataaa\taataac\taataag\taataat\taataca\taatacc\taatacg\taatact\taataga\taatagc\taatagg\taatagt\taatata\taatatc\taatatg\taatatt\taatcaa\taatcac\taatcag\taatcat\taatcca\taatccc\taatccg\taatcct\taatcga\taatcgc\taatcgg\taatcgt\taatcta\taatctc\taatctg\taatctt\taatgaa\taatgac\taatgag\taatgat\taatgca\taatgcc\taatgcg\taatgct\taatgga\taatggc\taatggg\taatggt\taatgta\taatgtc\taatgtg\taatgtt\taattaa\taattac\taattag\taattat\taattca\taattcc\taattcg\taattct\taattga\taattgc\taattgg\taattgt\taattta\taatttc\taatttg\taatttt\tacaaaa\tacaaac\tacaaag\tacaaat\tacaaca\tacaacc\tacaacg\tacaact\tacaaga\tacaagc\tacaagg\tacaagt\tacaata\tacaatc\tacaatg\tacaatt\tacacaa\tacacac\tacacag\tacacat\tacacca\tacaccc\tacaccg\tacacct\tacacga\tacacgc\tacacgg\tacacgt\tacacta\tacactc\tacactg\tacactt\tacagaa\tacagac\tacagag\tacagat\tacagca\tacagcc\tacagcg\tacagct\tacagga\tacaggc\tacaggg\tacaggt\tacagta\tacagtc\tacagtg\tacagtt\tacataa\tacatac\tacatag\tacatat\tacatca\tacatcc\tacatcg\tacatct\tacatga\tacatgc\tacatgg\tacatgt\tacatta\tacattc\tacattg\tacattt\taccaaa\taccaac\taccaag\taccaat\taccaca\taccacc\taccacg\taccact\taccaga\taccagc\taccagg\taccagt\taccata\taccatc\taccatg\taccatt\tacccaa\tacccac\tacccag\tacccat\tacccca\taccccc\taccccg\tacccct\tacccga\tacccgc\tacccgg\tacccgt\tacccta\taccctc\taccctg\taccctt\taccgaa\taccgac\taccgag\taccgat\taccgca\taccgcc\taccgcg\taccgct\taccgga\taccggc\taccggg\taccggt\taccgta\taccgtc\taccgtg\taccgtt\tacctaa\tacctac\tacctag\tacctat\tacctca\tacctcc\tacctcg\tacctct\tacctga\tacctgc\tacctgg\tacctgt\tacctta\taccttc\taccttg\taccttt\tacgaaa\tacgaac\tacgaag\tacgaat\tacgaca\tacgacc\tacgacg\tacgact\tacgaga\tacgagc\tacgagg\tacgagt\tacgata\tacgatc\tacgatg\tacgatt\tacgcaa\tacgcac\tacgcag\tacgcat\tacgcca\tacgccc\tacgccg\tacgcct\tacgcga\tacgcgc\tacgcgg\tacgcgt\tacgcta\tacgctc\tacgctg\tacgctt\tacggaa\tacggac\tacggag\tacggat\tacggca\tacggcc\tacggcg\tacggct\tacggga\tacgggc\tacgggg\tacgggt\tacggta\tacggtc\tacggtg\tacggtt\tacgtaa\tacgtac\tacgtag\tacgtat\tacgtca\tacgtcc\tacgtcg\tacgtct\tacgtga\tacgtgc\tacgtgg\tacgtgt\tacgtta\tacgttc\tacgttg\tacgttt\tactaaa\tactaac\tactaag\tactaat\tactaca\tactacc\tactacg\tactact\tactaga\tactagc\tactagg\tactagt\tactata\tactatc\tactatg\tactatt\tactcaa\tactcac\tactcag\tactcat\tactcca\tactccc\tactccg\tactcct\tactcga\tactcgc\tactcgg\tactcgt\tactcta\tactctc\tactctg\tactctt\tactgaa\tactgac\tactgag\tactgat\tactgca\tactgcc\tactgcg\tactgct\tactgga\tactggc\tactggg\tactggt\tactgta\tactgtc\tactgtg\tactgtt\tacttaa\tacttac\tacttag\tacttat\tacttca\tacttcc\tacttcg\tacttct\tacttga\tacttgc\tacttgg\tacttgt\tacttta\tactttc\tactttg\tactttt\tagaaaa\tagaaac\tagaaag\tagaaat\tagaaca\tagaacc\tagaacg\tagaact\tagaaga\tagaagc\tagaagg\tagaagt\tagaata\tagaatc\tagaatg\tagaatt\tagacaa\tagacac\tagacag\tagacat\tagacca\tagaccc\tagaccg\tagacct\tagacga\tagacgc\tagacgg\tagacgt\tagacta\tagactc\tagactg\tagactt\tagagaa\tagagac\tagagag\tagagat\tagagca\tagagcc\tagagcg\tagagct\tagagga\tagaggc\tagaggg\tagaggt\tagagta\taga'..b'\t0\t0\t1\t0\t0\t0\t0\t0\t0\t1\t1\t0\t0\t0\t0\t1\t1\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t2\t2\t0\t0\t1\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t2\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t1\t0\t1\t0\t0\t0\t0\t1\t0\t3\t1\t0\t0\t1\t0\t0\t0\t1\t0\t1\t2\t0\t0\t0\t0\t1\t0\t0\t0\t1\t0\t0\t1\t1\t0\t0\t1\t1\t0\t1\t0\t1\t1\t1\t0\t1\t0\t0\t1\t0\t0\t0\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t1\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t1\t0\t0\t1\t0\t1\t1\t0\t2\t0\t0\t0\t0\t0\t1\t2\t0\t3\t2\t0\t2\t0\t1\t1\t1\t0\t0\t0\t3\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t1\t0\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t2\t0\t1\t0\t1\t0\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t1\t0\t0\t0\t2\t1\t1\t0\t1\t1\t0\t1\t0\t3\t2\t2\t1\t1\t0\t0\t0\t1\t1\t0\t1\t0\t1\t0\t0\t0\t1\t0\t0\t3\t0\t1\t0\t1\t0\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t2\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t2\t0\t0\t0\t0\t0\t0\t2\t0\t0\t0\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t2\t0\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t1\t1\t0\t0\t0\t0\t1\t2\t0\t1\t0\t0\t1\t2\t0\t7\t0\t3\t2\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t2\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t2\t0\t0\t0\t1\t0\t0\t0\t0\t0\t3\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t1\t0\t0\t0\t0\t1\t0\t0\t0\t1\t0\t0\t0\t1\t1\t0\t1\t2\t1\t2\t0\t1\t3\t1\t0\t1\t3\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t1\t0\t0\t0\t0\t0\t1\t1\t0\t1\t1\t0\t0\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t1\t0\t0\t2\t0\t1\t0\t2\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t1\t2\t0\t0\t0\t0\t0\t1\t0\t2\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t1\t0\t0\t0\t0\t1\t0\t1\t2\t2\t0\t0\t1\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t2\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t1\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t1\t0\t0\t1\t1\t2\t0\t0\t1\t0\t0\t0\t1\t0\t0\t0\t2\t0\t0\t0\t0\t2\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t1\t1\t0\t1\t0\t1\t0\t1\t1\t0\t0\t0\t1\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t1\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t1\t0\t0\t0\t0\t0\t0\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t1\t1\t0\t0\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t2\t0\t2\t0\t0\t1\t0\t1\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t1\t0\t0\t0\t0\t1\t2\t0\t1\t2\t1\t1\t1\t1\t0\t2\t2\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t1\t0\t0\t0\t0\t2\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t1\t2\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t1\t0\t1\t0\t1\t0\t1\t0\t0\t1\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t1\t0\t2\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t1\t0\t0\t1\t0\t1\t0\t0\t1\t0\t0\t0\t0\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t2\t0\t0\t0\t1\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t1\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t1\t0\t1\t0\t2\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t1\t1\t0\t1\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t1\t1\t0\t0\t0\t0\t0\t0\t2\t1\t1\t0\t0\t2\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\n'
b
diff -r 000000000000 -r cc5a776acd47 test-data/cds_fasta_out.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cds_fasta_out.fasta Mon Apr 11 20:36:16 2022 +0000
b
@@ -0,0 +1,8 @@
+>ENST00000432222.1
+GGCCAGGTCCCAGGAACCCATATTTAGAACTGAGGGTCCTATGGCCCAGAACCAGGCATCTGTACTTAAGAACCAAGCACCTGTGACCAGGACCCAGGCACCCATCACTGGAACCCTCTGTCAGGATGCCAGATCCAACTCTCATCCAGTGAAGCCCTCAAGACTCAATGTCTTCTGTTGCCCCCATTGTTCTTTGACTTTTAGCAAGAAATCCTATCTCTCCAGACACCAGAAGGCCCACCTCACAGAGCCGCCCAACTACTGCTTCCATTGCAGCAAGTCTTTCAGCTCATTTTCCAGGCTGGTCAGACACCAGCAGACCCACTGGAAGCAGAAGAGCTACCTTTGCCCTATCTGTGACCTCTCCTTTGGGGAGAAAGAGGGCCTTATGGATCACTGGAGGGGCTATAAAGGCAAGGACCTGTGCCAGAGCAGCCACCATAAATGCCGGGTGATCCTGGGCCAGTGGCTTGGCTTCTCTCATGATGTCCCCACTATGGCTGGGGAGGAATGGAAGCATGGAGGTGATCAATCTCCCCCCAGGATCCATACCCCCAGGAGAAGAGGCCTAAGAGAGAAGGCCTGCAAAGGAGACAAAACAAAGGAGGCAGTGAGCATCTTGAAACATAAATA
+>ENST00000448800.5
+TGGCCACACTGCAACAGCTGGAGCCAGTTACTGGGCAGGTGTCTGTACTGGTGATGTGTCACACTCGGGAGTTGGCTTTTCAGATCAGCAAGGAATATGAGCGCTTCTCTAAATACATGCCCAATGTCAAGGTTGCTGTTTTTTTTGGTGGTCTGTCTATCAAGAAGGATGAAGAGGTGCTGAAGAAGAACTGCCCGCATATCGTCGTGGGGACTCCAGGCCGTATCCTAGCCCTGGCTCGAAATAAGAGCCTCAACCTCAAACACATTAAACACTTTATTTTGGATGAATGTGATAAGATGCTTGAACAGCTCGACATGCGTCGGGATGTCCAGGAAATTTTTCGCATGACCCCCCACGAGAAGCAGGTCATGATGTTCAGTGCTACCTTGAGCAAAGAGATCCGTCCAGTCTGCCGCAAGTTCATGCAAGATCCAATGGAGATCTTCGTGGATGATGAGACGAAGTTGACGCTGCATGGGTTGCAGCAGTACTACGTGAAACTGAAGGACAACGAGAAGAACCGGAAGCTCTTTGACCTTCTGGATGTCCTTGAGTTCAACCAGGTGGTGATCTTTGTGAAGTCTGTGCAGCGGTGCATTGCCTTGGCCCAGCTACTAGTGGAGCAGAACTTCCCAGCCATTGCCATCCACCGTGGGATGCCCCAGGAGGAGAGGCTTTCTCGGTATCAGCAGTTTAAAGATTTTCAACGACGAATTCTTGTGGCTACCAACCTATTTGGCCGAGGCATGGACATCGAGCGGGTGAACATTGCTTTTAATTATGACATGCCTGAGGATTCTGACACCTACCTGCATCGGGTGGCCAGAGCAGGCCGGTTTGGCACCAAGGGCTTGGCTATCACATTTGTGTCCGATGAGAATGATGCCAAGATCCTCAATGATGTGCAGGATCGCTTTGAGGTCAATATTAGTGAGCTGCCTGATGAGATAGACATCTCCTCCTACATTGAACAGACACGGT
+>ENST00000445122.5
+ATGCCGGGCACCCAGACTCCAGCACCGGCCGAGGACCCCCACTCCGGCTGCAGGGACCCTGTCCCAGCGAGACCGCAGGCATGTCATCCGAAAAGTCAGGAGACTCGCTTCGAGGGCCCACTTcccccgccgccgcccgctgccgccgccccgcccccgccggcgccagccCAGACTGCCCAGGCCCCTGGCTTCGTGGTGCCCACGCACGCGGGGACTGTGGGCACGCTGCCGCTGGGGGGCTACGTAGCGCCCGGATACCCCCTGCAGCTGCAGCCTTGCACTGCTTACGTGCCGGTCTACCCGGTGGGCACGCCATATGCAGGCGGGACCCCGGGGGGAACAGGAGTGACCTCCACTCTCCCCCCGCCGCCCCAGGGCCCAGGGCTGGCCCTACTGGAGCCGAGGCGCCCGCCACACGACTACATGCCCATCGCGGTGCTGACCACCATCTGTTGCTTCTGGCCTACTGGCATCATTGCCATCTTCAAGGCCGTGCAGGTGCGCACGGCCTTGGCCCGCGGAGACATGGTGTCGGCCGAGATCGCTTCACGCGAGGCCCGGAACTTCTCCTTCATCTCCCTGGCCGTGGGCATCGCGGCCATGGTGCTCTGTACCATCCTCACCGTAGTCATCATCATCGCCGCGCAGCACCACGAGAACTACTGGGATCCCTAA
+>ENST00000430777.2
+ATGGATTTGTGGCCAGGGGCATGGAtgctgctgctgctgctcttcctgctgctgctcttcctgctgcCCACCCTGTGGTTCTGCAGCCCCAGTGCCAAGTACTTCTTCAAGATGGCCTTCTACAATGGCTGGATCCTCTTCCTGGCTGTGCTCGCCATCCCTGTGTGTGCCGTGCGAGGACGCAACGTCGAGAACATGAAGATCTTGCGTCTAATGCTGCTCCACATCAAATACCTGTACGGGATCCGAGTGGAGGTGCGAGGGGCTCACCACTTCCCTCCCTCGCAGCCCTATGTTGTTGTCTCCAACCACCAGAGCTCTCTCGATCTGCTTGGGATGATGGAGGTACTGCCAGGCCGCTGTGTGCCCATTGCCAAGCGCGAGCTACTGTGggctggctctgccgggctggcctgctggctggcaggAGTCATCTTCATCGACCGGAAGCGCACGGGGGATGCCATCAGTGTCATGTCTGAGGTCGCCCAGACCCTGCTCACCCAGGACGTGAGGGTCTGGGTGTTTCCTGAGGGAACGAGAAACCACAATGGCTCCATGCTGCCCTTCAAACGTGGCGCCTTCCATCTTGCAGTGCAGGCCCAGGTTCCCATTGTCCCCATAGTCATGTCCTCCTACCAAGACTTCTACTGCAAGAAGGAGCGTCGCTTCACCTCGGGACAATGTCAGGTGCGGGTGCTGCCCCCAGTGCCCACGGAAGGGCTGACACCAGATGACGTCCCAGCTCTGGCTGACAGAGTCCGGCACTCCATGCTCACTGTTTTCCGGGAAATCTCCACTGATGGCCGGGGTGGTGGTGACTATCTGAAGAAGCCTGGGGGCGGTGGGTGA
b
diff -r 000000000000 -r cc5a776acd47 test-data/codon_freq_from_bicodon.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/codon_freq_from_bicodon.tabular Mon Apr 11 20:36:16 2022 +0000
b
@@ -0,0 +1,5 @@
+#frame codon_count aaa aac aag aat aca acc acg act aga agc agg agt ata atc atg att caa cac cag cat cca ccc ccg cct cga cgc cgg cgt cta ctc ctg ctt gaa gac gag gat gca gcc gcg gct gga ggc ggg ggt gta gtc gtg gtt taa tac tag tat tca tcc tcg tct tga tgc tgg tgt tta ttc ttg ttt
+2-counts 855 4 12 14 8 13 23 8 15 28 18 25 12 8 27 31 7 3 11 17 4 23 28 16 14 4 12 2 1 10 16 35 7 4 7 16 4 16 38 6 14 15 15 18 4 4 11 25 4 1 12 3 3 18 15 7 16 16 17 26 10 8 20 19 7
+2-percent 855 0.4678362573099415500088582576 1.4035087719298244834931210789 1.6374269005847954527865795171 0.9356725146198831000177165151 1.5204678362573098571175478355 2.6900584795321638154064203263 0.9356725146198831000177165151 1.7543859649122806043664013487 3.2748538011695909055731590342 2.1052631578947367252396816184 2.9239766081871341185660639894 1.4035087719298244834931210789 0.9356725146198831000177165151 3.1578947368421053099041273526 3.6257309941520468044018343790 0.8187134502923977263932897586 0.3508771929824561208732802697 1.2865497076023393319132992474 1.9883040935672515736598597869 0.4678362573099415500088582576 2.6900584795321638154064203263 3.2748538011695909055731590342 1.8713450292397662000354330303 1.6374269005847954527865795171 0.4678362573099415500088582576 1.4035087719298244834931210789 0.2339181286549707750044291288 0.1169590643274853875022145644 1.1695906432748537362442675658 1.8713450292397662000354330303 4.0935672514619882988995414053 0.8187134502923977263932897586 0.4678362573099415500088582576 0.8187134502923977263932897586 1.8713450292397662000354330303 0.4678362573099415500088582576 1.8713450292397662000354330303 4.4444444444444446418174266000 0.7017543859649122417465605395 1.6374269005847954527865795171 1.7543859649122806043664013487 1.7543859649122806043664013487 2.1052631578947367252396816184 0.4678362573099415500088582576 0.4678362573099415500088582576 1.2865497076023393319132992474 2.9239766081871341185660639894 0.4678362573099415500088582576 0.1169590643274853875022145644 1.4035087719298244834931210789 0.3508771929824561208732802697 0.3508771929824561208732802697 2.1052631578947367252396816184 1.7543859649122806043664013487 0.8187134502923977263932897586 1.8713450292397662000354330303 1.8713450292397662000354330303 1.9883040935672515736598597869 3.0409356725146197142350956710 1.1695906432748537362442675658 0.9356725146198831000177165151 2.3391812865497074724885351316 2.2222222222222223209087133000 0.8187134502923977263932897586
+3-counts 855 3 2 3 4 12 10 10 10 17 12 16 5 2 7 7 3 23 10 19 18 37 23 19 25 7 16 10 4 8 14 19 18 27 10 14 12 23 12 8 19 31 18 11 11 3 7 8 9 11 6 4 9 22 15 17 20 27 36 28 19 1 5 9 10
+3-percent 855 0.3508771929824561208732802697 0.2339181286549707750044291288 0.3508771929824561208732802697 0.4678362573099415500088582576 1.4035087719298244834931210789 1.1695906432748537362442675658 1.1695906432748537362442675658 1.1695906432748537362442675658 1.9883040935672515736598597869 1.4035087719298244834931210789 1.8713450292397662000354330303 0.5847953216374268681221337829 0.2339181286549707750044291288 0.8187134502923977263932897586 0.8187134502923977263932897586 0.3508771929824561208732802697 2.6900584795321638154064203263 1.1695906432748537362442675658 2.2222222222222223209087133000 2.1052631578947367252396816184 4.3274853801169594902376047685 2.6900584795321638154064203263 2.2222222222222223209087133000 2.9239766081871341185660639894 0.8187134502923977263932897586 1.8713450292397662000354330303 1.1695906432748537362442675658 0.4678362573099415500088582576 0.9356725146198831000177165151 1.6374269005847954527865795171 2.2222222222222223209087133000 2.1052631578947367252396816184 3.1578947368421053099041273526 1.1695906432748537362442675658 1.6374269005847954527865795171 1.4035087719298244834931210789 2.6900584795321638154064203263 1.4035087719298244834931210789 0.9356725146198831000177165151 2.2222222222222223209087133000 3.6257309941520468044018343790 2.1052631578947367252396816184 1.2865497076023393319132992474 1.2865497076023393319132992474 0.3508771929824561208732802697 0.8187134502923977263932897586 0.9356725146198831000177165151 1.0526315789473683626198408092 1.2865497076023393319132992474 0.7017543859649122417465605395 0.4678362573099415500088582576 1.0526315789473683626198408092 2.5730994152046786638265984948 1.7543859649122806043664013487 1.9883040935672515736598597869 2.3391812865497074724885351316 3.1578947368421053099041273526 4.2105263157894734504793632368 3.2748538011695909055731590342 2.2222222222222223209087133000 0.1169590643274853875022145644 0.5847953216374268681221337829 1.0526315789473683626198408092 1.1695906432748537362442675658
b
diff -r 000000000000 -r cc5a776acd47 test-data/codon_freq_from_bicodon_log.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/codon_freq_from_bicodon_log.txt Mon Apr 11 20:36:16 2022 +0000
b
@@ -0,0 +1,7 @@
+Using 4096 codon pairs
+Reporting 64 codons
+Found a total of 100.000000 percent for 2.
+Found a total of 100.000000 percent for 3.
+Reporting 21 amino acids
+Found a total of 100.000000 percent for 2-aa.
+Found a total of 100.000000 percent for 3-aa.
b
diff -r 000000000000 -r cc5a776acd47 test-data/codon_out.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/codon_out.tabular Mon Apr 11 20:36:16 2022 +0000
b
@@ -0,0 +1,2 @@
+Division Assembly Taxid Species Organelle Translation Table # CDS # Codons aaa aac aag aat aca acc acg act aga agc agg agt ata atc atg att caa cac cag cat cca ccc ccg cct cga cgc cgg cgt cta ctc ctg ctt gaa gac gag gat gca gcc gcg gct gga ggc ggg ggt gta gtc gtg gtt taa tac tag tat tca tcc tcg tct tga tgc tgg tgt tta ttc ttg ttt
+custom hg38 9606 Homo sapiens genomic 1 4 859 14 13 29 8 5 10 4 9 9 12 13 6 4 17 17 9 12 21 28 33 13 26 7 14 13 14 15 12 12 17 20 25 13 16 26 27 12 33 7 30 9 28 16 12 3 12 21 3 0 9 0 5 5 12 1 15 0 20 9 14 1 11 11 17
b
diff -r 000000000000 -r cc5a776acd47 test-data/find_naltorfs_log.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/find_naltorfs_log.txt Mon Apr 11 20:36:16 2022 +0000
b
@@ -0,0 +1,7 @@
+{'chr6_GL000250v2_alt': 4672374}
+Excluding 77 unique canonical coding sequences from 77 BED lines.
+cds length is not divisible by 3!!!: 634. chr6_GL000250v2_alt 938190 938841 ENST00000432222.1 0 - 938207 938841 12,12,120 1 651 0
+cds length is not divisible by 3!!!: 271. chr6_GL000250v2_alt 2057211 2060006 ENST00000420604.1 0 - 2057618 2060006 12,12,120 3 437,165,76 0,2313,2719
+cds length is not divisible by 3!!!: 986. chr6_GL000250v2_alt 2862767 2871733 ENST00000448800.5 0 - 2862982 2871733 12,12,120 9 232,148,145,110,132,119,184,93,38 0,560,834,1077,2561,5148,6281,8544,8928
+cds length is not divisible by 3!!!: 580. chr6_GL000250v2_alt 3655068 3704125 ENST00000425722.1 0 - 3655068 3703952 12,12,120 19 21,21,24,21,21,21,21,21,21,39,90,21,21,21,30,33,33,87,186 0,637,1109,8114,9561,12963,13438,14141,16499,17121,26913,32289,32814,43342,43448,45110,45965,47083,48871
+transcripts: 77 unique LNO ORFs: 4
b
diff -r 000000000000 -r cc5a776acd47 test-data/gencode_canonical.hg38.chr6_GL0002508.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gencode_canonical.hg38.chr6_GL0002508.bed Mon Apr 11 20:36:16 2022 +0000
b
b'@@ -0,0 +1,77 @@\n+chr6_GL000250v2_alt\t168585\t189574\tENST00000437160.6\t0\t-\t169653\t189217\t12,12,120\t8\t1664,27,33,116,23,231,96,777,\t0,4234,5806,5971,8695,17011,18891,20212,\n+chr6_GL000250v2_alt\t260408\t270885\tENST00000426120.2\t0\t-\t260624\t269578\t12,12,120\t7\t1802,105,127,92,82,269,253,\t0,3949,4702,5171,8728,9161,10224,\n+chr6_GL000250v2_alt\t309837\t310865\tENST00000421027.1\t0\t-\t309837\t310800\t12,12,120\t1\t1028,\t0,\n+chr6_GL000250v2_alt\t351929\t352936\tENST00000420178.3\t0\t-\t351929\t352871\t12,12,120\t1\t1007,\t0,\n+chr6_GL000250v2_alt\t377510\t378446\tENST00000428081.1\t0\t+\t377510\t378446\t12,12,120\t1\t936,\t0,\n+chr6_GL000250v2_alt\t438928\t440200\tENST00000440124.4\t0\t+\t439261\t440200\t12,12,120\t1\t1272,\t0,\n+chr6_GL000250v2_alt\t572355\t573321\tENST00000420495.4\t0\t+\t572355\t573321\t12,12,120\t1\t966,\t0,\n+chr6_GL000250v2_alt\t691168\t722731\tENST00000435232.5\t0\t-\t692358\t693306\t12,12,120\t5\t2229,48,125,124,85,\t0,3408,4796,6359,31478,\n+chr6_GL000250v2_alt\t704969\t706618\tENST00000455234.1\t0\t+\t705679\t706618\t12,12,120\t1\t1649,\t0,\n+chr6_GL000250v2_alt\t722840\t729989\tENST00000453467.5\t0\t+\t727430\t728381\t12,12,120\t4\t27,116,51,2830,\t0,1273,3224,4319,\n+chr6_GL000250v2_alt\t752426\t753563\tENST00000418003.3\t0\t-\t752426\t753563\t12,12,120\t1\t1137,\t0,\n+chr6_GL000250v2_alt\t821306\t825621\tENST00000414436.2\t0\t-\t821574\t825397\t12,12,120\t2\t739,251,\t0,4064,\n+chr6_GL000250v2_alt\t821306\t893756\tENST00000449163.5\t0\t-\t869267\t893509\t12,12,120\t19\t739,176,144,129,128,94,108,117,133,151,78,64,243,192,66,102,171,135,553,\t0,47959,48912,49278,49987,50809,51316,51521,53020,53648,55343,56970,57662,65507,66158,66510,67711,68296,71897,\n+chr6_GL000250v2_alt\t853147\t854731\tENST00000453513.4\t0\t+\t853637\t854576\t12,12,120\t1\t1584,\t0,\n+chr6_GL000250v2_alt\t922689\t938078\tENST00000548017.1\t0\t+\t922918\t936889\t12,12,120\t7\t317,348,114,21,21,21,1203,\t0,2336,9166,10657,10918,13715,14186,\n+chr6_GL000250v2_alt\t938190\t938841\tENST00000432222.1\t0\t-\t938207\t938841\t12,12,120\t1\t651,\t0,\n+chr6_GL000250v2_alt\t1093432\t1096751\tENST00000615098.4\t0\t+\t1093456\t1095994\t12,12,120\t7\t97,270,276,276,117,33,372,\t0,226,722,1597,1995,2557,2947,\n+chr6_GL000250v2_alt\t1147347\t1267669\tENST00000640829.1\t0\t+\t1147490\t1266858\t12,12,120\t11\t216,16,14,240,276,276,33,84,93,35,281,\t0,340,39005,53186,53667,54522,68203,118979,119506,119955,120041,\n+chr6_GL000250v2_alt\t1319351\t1323001\tENST00000443494.6\t0\t+\t1319622\t1322789\t12,12,120\t4\t416,101,110,237,\t0,663,970,3413,\n+chr6_GL000250v2_alt\t1324800\t1328425\tENST00000441553.5\t0\t+\t1326768\t1327398\t12,12,120\t3\t301,109,1230,\t0,1881,2395,\n+chr6_GL000250v2_alt\t1328357\t1333979\tENST00000450015.2\t0\t-\t1329202\t1333881\t12,12,120\t4\t1426,92,23,665,\t0,2891,3190,4957,\n+chr6_GL000250v2_alt\t1368538\t1371196\tENST00000422484.1\t0\t-\t1368538\t1370895\t12,12,120\t4\t231,96,500,43,\t0,1199,1940,2615,\n+chr6_GL000250v2_alt\t1394223\t1406831\tENST00000433713.5\t0\t+\t1395132\t1405908\t12,12,120\t6\t62,649,96,225,23,1011,\t0,605,9866,10857,11392,11597,\n+chr6_GL000250v2_alt\t1410040\t1419029\tENST00000449208.6\t0\t-\t1412064\t1418953\t12,12,120\t7\t2542,33,116,23,231,96,505,\t0,3782,4994,5424,6453,7204,8484,\n+chr6_GL000250v2_alt\t1421285\t1430769\tENST00000552062.2\t0\t+\t1421971\t1430429\t12,12,120\t8\t860,96,231,9,14,116,33,858,\t0,3970,5094,6035,6633,7295,7770,8626,\n+chr6_GL000250v2_alt\t1442505\t1471482\tENST00000425523.5\t0\t-\t1443926\t1457164\t12,12,120\t9\t2104,33,116,23,231,96,488,104,123,\t0,4719,4970,5565,12071,13984,14221,16590,28854,\n+chr6_GL000250v2_alt\t1656333\t1673225\tENST00000458607.5\t0\t+\t1658808\t1671755\t12,12,120\t8\t152,153,460,96,231,23,116,2018,\t0,2046,2468,3938,8907,13070,13667,14874,\n+chr6_GL000250v2_alt\t1658801\t1676354\tENST00000550581.3\t0\t+\t1658808\t1676306\t12,12,120\t10\t460,96,231,23,116,185,101,83,126,146,\t0,1470,6439,10602,11199,12406,15992,16185,17126,17407,\n+chr6_GL000250v2_alt\t1870880\t1883062\tENST00000383450.3\t0\t-\t1875575\t1883062\t12,12,120\t7\t4937,141,163,179,195,96,208,\t0,5319,5735,5975,11091,11708,11974,\n+chr6_GL000250v2_alt\t1988977\t2002543\tENST00000450902.6\t0\t-\t1988977\t2002347\t12,12,120\t12\t153,99,211,116,111,192,204,255,57,163,239,403,\t0,264,560,3155,3439,5329,5599,6007,10938,11318,11'..b'947,3566,\n+chr6_GL000250v2_alt\t3195810\t3211661\tENST00000550401.2\t0\t-\t3196242\t3211595\t12,12,120\t20\t554,85,95,71,74,104,95,254,103,93,100,236,84,88,61,100,79,74,49,106,\t0,1457,1621,1815,2120,2303,2495,2678,5960,6350,7406,7615,8117,8277,11264,11733,12656,12860,13838,15745,\n+chr6_GL000250v2_alt\t3212373\t3230306\tENST00000421926.6\t0\t-\t3212698\t3230299\t12,12,120\t28\t506,176,79,87,116,78,145,168,155,105,102,123,107,83,183,236,144,149,102,115,135,156,41,85,254,219,67,49,\t0,914,1255,2940,3135,3603,4047,4633,4886,5153,6822,7016,7236,7818,8020,8282,8604,8840,9210,9469,9709,12652,12939,13071,16511,16854,17171,17884,\n+chr6_GL000250v2_alt\t3406267\t3418718\tENST00000433037.1\t0\t-\t3406267\t3418718\t12,12,120\t6\t306,333,306,291,297,369,\t0,5366,7700,8368,10745,12082,\n+chr6_GL000250v2_alt\t3480956\t3485682\tENST00000445122.5\t0\t-\t3481819\t3484701\t12,12,120\t6\t1040,186,216,91,81,112,\t0,1178,2009,3450,3737,4614,\n+chr6_GL000250v2_alt\t3497183\t3500883\tENST00000414111.6\t0\t+\t3498767\t3500558\t12,12,120\t9\t75,129,123,110,96,171,80,154,372,\t0,1556,1916,2119,2328,2505,2768,2921,3328,\n+chr6_GL000250v2_alt\t3500813\t3510698\tENST00000430777.2\t0\t-\t3501877\t3504098\t12,12,120\t7\t1237,73,96,176,134,209,96,\t0,1754,1953,2213,2725,3085,9789,\n+chr6_GL000250v2_alt\t3510955\t3513395\tENST00000449794.2\t0\t+\t3511113\t3512928\t12,12,120\t6\t298,19,113,55,118,565,\t0,1187,1297,1523,1655,1875,\n+chr6_GL000250v2_alt\t3513569\t3516924\tENST00000549839.5\t0\t-\t3513744\t3516824\t12,12,120\t5\t272,83,123,107,152,\t0,383,2660,2913,3203,\n+chr6_GL000250v2_alt\t3517334\t3522785\tENST00000453487.2\t0\t-\t3518980\t3522514\t12,12,120\t9\t1739,87,89,154,136,191,248,74,492,\t0,1868,2077,2506,2911,3230,3521,3987,4959,\n+chr6_GL000250v2_alt\t3523364\t3528122\tENST00000457070.1\t0\t-\t3523964\t3525112\t12,12,120\t8\t738,200,103,59,84,81,93,98,\t0,943,1382,1706,2275,3544,3971,4660,\n+chr6_GL000250v2_alt\t3527441\t3536999\tENST00000425600.1\t0\t-\t3528035\t3536999\t12,12,120\t12\t1308,98,148,296,139,82,220,176,384,524,113,253,\t0,1481,2082,2456,3579,3807,4084,6000,6286,7245,8938,9305,\n+chr6_GL000250v2_alt\t3655068\t3704125\tENST00000425722.1\t0\t-\t3655068\t3703952\t12,12,120\t19\t21,21,24,21,21,21,21,21,21,39,90,21,21,21,30,33,33,87,186,\t0,637,1109,8114,9561,12963,13438,14141,16499,17121,26913,32289,32814,43342,43448,45110,45965,47083,48871,\n+chr6_GL000250v2_alt\t3726193\t3739359\tENST00000445928.5\t0\t-\t3726199\t3739352\t12,12,120\t7\t95,282,348,21,282,348,86,\t0,781,2076,7820,8970,10974,13080,\n+chr6_GL000250v2_alt\t3823888\t3837634\tENST00000426847.2\t0\t-\t3824827\t3837535\t12,12,120\t6\t953,24,111,282,270,199,\t0,1752,2249,3043,5625,13547,\n+chr6_GL000250v2_alt\t4047519\t4053351\tENST00000449560.5\t0\t+\t4047581\t4052533\t12,12,120\t4\t144,249,282,973,\t0,3778,4410,4859,\n+chr6_GL000250v2_alt\t4062236\t4069674\tENST00000419685.6\t0\t-\t4062591\t4069610\t12,12,120\t6\t369,24,111,282,267,161,\t0,1184,1676,2753,5563,7277,\n+chr6_GL000250v2_alt\t4117786\t4122072\tENST00000426644.6\t0\t-\t4118239\t4121975\t12,12,120\t6\t489,32,111,282,270,188,\t0,677,960,1557,2281,4098,\n+chr6_GL000250v2_alt\t4126846\t4143590\tENST00000439425.6\t0\t-\t4127302\t4143099\t12,12,120\t12\t486,137,160,174,189,129,198,206,131,115,497,75,\t0,7419,7949,8286,8637,10352,10646,13173,13662,15556,15760,16669,\n+chr6_GL000250v2_alt\t4145581\t4149568\tENST00000436627.2\t0\t-\t4145823\t4149278\t12,12,120\t6\t331,205,130,112,148,425,\t0,814,1417,1955,2225,3562,\n+chr6_GL000250v2_alt\t4150073\t4158843\tENST00000418205.2\t0\t-\t4150443\t4158681\t12,12,120\t11\t577,137,163,174,189,129,198,206,131,115,940,\t0,1859,2304,2710,3443,3781,5109,5733,6900,7179,7830,\n+chr6_GL000250v2_alt\t4159070\t4184429\tENST00000425855.2\t0\t+\t4159094\t4184317\t12,12,120\t6\t84,68,130,142,93,156,\t0,1932,3799,4158,22841,25203,\n+chr6_GL000250v2_alt\t4239473\t4245915\tENST00000395312.7\t0\t-\t4239815\t4245652\t12,12,120\t6\t359,36,117,285,282,318,\t0,713,907,2543,4055,6124,\n+chr6_GL000250v2_alt\t4369248\t4385378\tENST00000443117.5\t0\t-\t4373329\t4378232\t12,12,120\t6\t779,167,282,246,199,31,\t0,4069,4450,5072,8884,16099,\n+chr6_GL000250v2_alt\t4380587\t4391853\tENST00000411749.6\t0\t+\t4380703\t4390890\t12,12,120\t6\t216,264,282,111,24,663,\t0,4752,9014,9843,10283,10603,\n'
b
diff -r 000000000000 -r cc5a776acd47 test-data/hg38.chr6_GL000250v2_alt.2bit.gz
b
Binary file test-data/hg38.chr6_GL000250v2_alt.2bit.gz has changed
b
diff -r 000000000000 -r cc5a776acd47 test-data/naltorfs_fasta_out.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/naltorfs_fasta_out.fasta Mon Apr 11 20:36:16 2022 +0000
b
@@ -0,0 +1,8 @@
+>ENST00000432222.1|chromosome:hg38:chr6_GL000250v2_alt:938210:938801:-
+ATGGCCCAGAACCAGGCATCTGTACTTAAGAACCAAGCACCTGTGACCAGGACCCAGGCACCCATCACTGGAACCCTCTGTCAGGATGCCAGATCCAACTCTCATCCAGTGAAGCCCTCAAGACTCAATGTCTTCTGTTGCCCCCATTGTTCTTTGACTTTTAGCAAGAAATCCTATCTCTCCAGACACCAGAAGGCCCACCTCACAGAGCCGCCCAACTACTGCTTCCATTGCAGCAAGTCTTTCAGCTCATTTTCCAGGCTGGTCAGACACCAGCAGACCCACTGGAAGCAGAAGAGCTACCTTTGCCCTATCTGTGACCTCTCCTTTGGGGAGAAAGAGGGCCTTATGGATCACTGGAGGGGCTATAAAGGCAAGGACCTGTGCCAGAGCAGCCACCATAAATGCCGGGTGATCCTGGGCCAGTGGCTTGGCTTCTCTCATGATGTCCCCACTATGGCTGGGGAGGAATGGAAGCATGGAGGTGATCAATCTCCCCCCAGGATCCATACCCCCAGGAGAAGAGGCCTAAGAGAGAAGGCCTGCAAAGGAGACAAAACAAAGGAGGCAGTGAGCATCTTGAAACATAAA
+>ENST00000448800.5|chromosome:hg38:chr6_GL000250v2_alt:2862985:2871389:-
+ATGTGTCACACTCGGGAGTTGGCTTTTCAGATCAGCAAGGAATATGAGCGCTTCTCTAAATACATGCCCAATGTCAAGGTTGCTGTTTTTTTTGGTGGTCTGTCTATCAAGAAGGATGAAGAGGTGCTGAAGAAGAACTGCCCGCATATCGTCGTGGGGACTCCAGGCCGTATCCTAGCCCTGGCTCGAAATAAGAGCCTCAACCTCAAACACATTAAACACTTTATTTTGGATGAATGTGATAAGATGCTTGAACAGCTCGACATGCGTCGGGATGTCCAGGAAATTTTTCGCATGACCCCCCACGAGAAGCAGGTCATGATGTTCAGTGCTACCTTGAGCAAAGAGATCCGTCCAGTCTGCCGCAAGTTCATGCAAGATCCAATGGAGATCTTCGTGGATGATGAGACGAAGTTGACGCTGCATGGGTTGCAGCAGTACTACGTGAAACTGAAGGACAACGAGAAGAACCGGAAGCTCTTTGACCTTCTGGATGTCCTTGAGTTCAACCAGGTGGTGATCTTTGTGAAGTCTGTGCAGCGGTGCATTGCCTTGGCCCAGCTACTAGTGGAGCAGAACTTCCCAGCCATTGCCATCCACCGTGGGATGCCCCAGGAGGAGAGGCTTTCTCGGTATCAGCAGTTTAAAGATTTTCAACGACGAATTCTTGTGGCTACCAACCTATTTGGCCGAGGCATGGACATCGAGCGGGTGAACATTGCTTTTAATTATGACATGCCTGAGGATTCTGACACCTACCTGCATCGGGTGGCCAGAGCAGGCCGGTTTGGCACCAAGGGCTTGGCTATCACATTTGTGTCCGATGAGAATGATGCCAAGATCCTCAATGATGTGCAGGATCGCTTTGAGGTCAATATTAGTGAGCTGCCTGATGAGATAGACATCTCCTCCTACATTGAACAGACACGG
+>ENST00000445122.5|chromosome:hg38:chr6_GL000250v2_alt:3481820:3484425:-
+ATGTCATCCGAAAAGTCAGGAGACTCGCTTCGAGGGCCCACTTcccccgccgccgcccgctgccgccgccccgcccccgccggcgccagccCAGACTGCCCAGGCCCCTGGCTTCGTGGTGCCCACGCACGCGGGGACTGTGGGCACGCTGCCGCTGGGGGGCTACGTAGCGCCCGGATACCCCCTGCAGCTGCAGCCTTGCACTGCTTACGTGCCGGTCTACCCGGTGGGCACGCCATATGCAGGCGGGACCCCGGGGGGAACAGGAGTGACCTCCACTCTCCCCCCGCCGCCCCAGGGCCCAGGGCTGGCCCTACTGGAGCCGAGGCGCCCGCCACACGACTACATGCCCATCGCGGTGCTGACCACCATCTGTTGCTTCTGGCCTACTGGCATCATTGCCATCTTCAAGGCCGTGCAGGTGCGCACGGCCTTGGCCCGCGGAGACATGGTGTCGGCCGAGATCGCTTCACGCGAGGCCCGGAACTTCTCCTTCATCTCCCTGGCCGTGGGCATCGCGGCCATGGTGCTCTGTACCATCCTCACCGTAGTCATCATCATCGCCGCGCAGCACCACGAGAACTACTGGGATCCCTA
+>ENST00000430777.2|chromosome:hg38:chr6_GL000250v2_alt:3503057:3504078:-
+ATGGAtgctgctgctgctgctcttcctgctgctgctcttcctgctgcCCACCCTGTGGTTCTGCAGCCCCAGTGCCAAGTACTTCTTCAAGATGGCCTTCTACAATGGCTGGATCCTCTTCCTGGCTGTGCTCGCCATCCCTGTGTGTGCCGTGCGAGGACGCAACGTCGAGAACATGAAGATCTTGCGTCTAATGCTGCTCCACATCAAATACCTGTACGGGATCCGAGTGGAGGTGCGAGGGGCTCACCACTTCCCTCCCTCGCAGCCCTATGTTGTTGTCTCCAACCACCAGAGCTCTCTCGATCTGCTTGGGATGATGGAGGTACTGCCAGGCCGCTGTGTGCCCATTGCCAAGCGCGAGCTACTGTGggctggctctgccgggctggcctgctggctggcaggAGTCATCTTCATCGACCGGAAGCGCACGGGGGATGCCATCAGTGTCATGTC
b
diff -r 000000000000 -r cc5a776acd47 test-data/peptide_fasta_out.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/peptide_fasta_out.fasta Mon Apr 11 20:36:16 2022 +0000
b
@@ -0,0 +1,8 @@
+>ENST00000432222.1|chromosome:hg38:chr6_GL000250v2_alt:938210:938801:-
+MAQNQASVLKNQAPVTRTQAPITGTLCQDARSNSHPVKPSRLNVFCCPHCSLTFSKKSYLSRHQKAHLTEPPNYCFHCSKSFSSFSRLVRHQQTHWKQKSYLCPICDLSFGEKEGLMDHWRGYKGKDLCQSSHHKCRVILGQWLGFSHDVPTMAGEEWKHGGDQSPPRIHTPRRRGLREKACKGDKTKEAVSILKHK
+>ENST00000448800.5|chromosome:hg38:chr6_GL000250v2_alt:2862985:2871389:-
+MCHTRELAFQISKEYERFSKYMPNVKVAVFFGGLSIKKDEEVLKKNCPHIVVGTPGRILALARNKSLNLKHIKHFILDECDKMLEQLDMRRDVQEIFRMTPHEKQVMMFSATLSKEIRPVCRKFMQDPMEIFVDDETKLTLHGLQQYYVKLKDNEKNRKLFDLLDVLEFNQVVIFVKSVQRCIALAQLLVEQNFPAIAIHRGMPQEERLSRYQQFKDFQRRILVATNLFGRGMDIERVNIAFNYDMPEDSDTYLHRVARAGRFGTKGLAITFVSDENDAKILNDVQDRFEVNISELPDEIDISSYIEQTR
+>ENST00000445122.5|chromosome:hg38:chr6_GL000250v2_alt:3481820:3484425:-
+MSSEKSGDSLRGPTSPAAARCRRPAPAGASPDCPGPWLRGAHARGDCGHAAAGGLRSARIPPAAAALHCLRAGLPGGHAICRRDPGGNRSDLHSPPAAPGPRAGPTGAEAPATRLHAHRGADHHLLLLAYWHHCHLQGRAGAHGLGPRRHGVGRDRFTRGPELLLHLPGRGHRGHGALYHPHRSHHHRRAAPRELLGSL
+>ENST00000430777.2|chromosome:hg38:chr6_GL000250v2_alt:3503057:3504078:-
+MDAAAAALPAAALPAAHPVVLQPQCQVLLQDGLLQWLDPLPGCARHPCVCRARTQRREHEDLASNAAPHQIPVRDPSGGARGSPLPSLAALCCCLQPPELSRSAWDDGGTARPLCAHCQARATVGWLCRAGLLAGRSHLHRPEAHGGCHQCHV
b
diff -r 000000000000 -r cc5a776acd47 test-data/twobit.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/twobit.loc Mon Apr 11 20:36:16 2022 +0000
b
@@ -0,0 +1,26 @@
+#This is a sample file distributed with Galaxy that is used by some
+#tools.  The twobit.loc file has this format (white space characters 
+#are TAB characters):
+#
+#<Build>    <FullPathToFile>
+#
+#So, for example, if you had droPer1 twobit files stored in 
+#/depot/data2/galaxy/droPer1/, then the twobit.loc entry 
+#would look like this:
+#
+#droPer1    /depot/data2/galaxy/droPer1/droPer1.2bit
+#
+#and your /depot/data2/galaxy/droPer1/ directory would 
+#contain all of your twobit files (e.g.):
+#
+#-rw-rw-r--   1 nate   galaxy 48972650 2007-05-04 11:27 droPer1.2bit
+#...etc...
+#
+#Your twobit.loc file should include an entry per line for each twobit 
+#file you have stored.  For example:
+#
+#droPer1    /depot/data2/galaxy/droPer1/droPer1.2bit
+#apiMel2    /depot/data2/galaxy/apiMel2/apiMel2.2bit
+#droAna1    /depot/data2/galaxy/droAna1/droAna1.2bit
+#droAna2    /depot/data2/galaxy/droAna2/droAna2.2bit
+#...etc...
b
diff -r 000000000000 -r cc5a776acd47 tool-data/all_fasta.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/all_fasta.loc.sample Mon Apr 11 20:36:16 2022 +0000
b
@@ -0,0 +1,18 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id> <dbkey> <display_name> <file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa
+#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa
+#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
b
diff -r 000000000000 -r cc5a776acd47 tool-data/twobit.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/twobit.loc.sample Mon Apr 11 20:36:16 2022 +0000
b
@@ -0,0 +1,26 @@
+#This is a sample file distributed with Galaxy that is used by some
+#tools.  The twobit.loc file has this format (white space characters 
+#are TAB characters):
+#
+#<Build>    <FullPathToFile>
+#
+#So, for example, if you had droPer1 twobit files stored in 
+#/depot/data2/galaxy/droPer1/, then the twobit.loc entry 
+#would look like this:
+#
+#droPer1    /depot/data2/galaxy/droPer1/droPer1.2bit
+#
+#and your /depot/data2/galaxy/droPer1/ directory would 
+#contain all of your twobit files (e.g.):
+#
+#-rw-rw-r--   1 nate   galaxy 48972650 2007-05-04 11:27 droPer1.2bit
+#...etc...
+#
+#Your twobit.loc file should include an entry per line for each twobit 
+#file you have stored.  For example:
+#
+#droPer1    /depot/data2/galaxy/droPer1/droPer1.2bit
+#apiMel2    /depot/data2/galaxy/apiMel2/apiMel2.2bit
+#droAna1    /depot/data2/galaxy/droAna1/droAna1.2bit
+#droAna2    /depot/data2/galaxy/droAna2/droAna2.2bit
+#...etc...
b
diff -r 000000000000 -r cc5a776acd47 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Mon Apr 11 20:36:16 2022 +0000
b
@@ -0,0 +1,13 @@
+<?xml version="1.0"?>
+<tables>
+    <!-- Locations of reference fasta files -->
+    <table name="all_fasta" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/all_fasta.loc" />
+    </table>
+    <!-- Locations of indexes in the 2bit format -->
+    <table name="twobit" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, path</columns>
+        <file path="tool-data/twobit.loc" />
+    </table>
+</tables>
b
diff -r 000000000000 -r cc5a776acd47 tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Mon Apr 11 20:36:16 2022 +0000
b
@@ -0,0 +1,13 @@
+<?xml version="1.0"?>
+<tables>
+    <!-- Locations of reference fasta files -->
+    <table name="all_fasta" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/all_fasta.loc" />
+    </table>
+    <!-- Locations of indexes in the 2bit format -->
+    <table name="twobit" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, path</columns>
+        <file path="${__HERE__}/test-data/twobit.loc" />
+    </table>
+</tables>