Previous changeset 0:95ea8d97abb4 (2019-01-11) Next changeset 2:dd0cd2319ae5 (2021-11-19) |
Commit message:
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608" |
modified:
spaln.xml test-data/output1.bed12 test-data/output1.tabular test-data/output1_gff_genes.gff3 test-data/output1_gff_matches.gff3 |
added:
list_spaln_tables.py list_spaln_tables.xml macros.xml test-data/gnm2tab test-data/ncbi_taxonomy_sqlite.loc test-data/output2.tabular test-data/sqlite_taxdb/tax.ncbitaxonomy.sqlite tool-data/ncbi_taxonomy_sqlite.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
b |
diff -r 95ea8d97abb4 -r 37b5e1f0b544 list_spaln_tables.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/list_spaln_tables.py Thu Jul 16 07:57:10 2020 -0400 |
[ |
@@ -0,0 +1,61 @@ +#!/usr/bin/env python3 + +import argparse +import shlex +import sys +from subprocess import run +from typing import TextIO + + +def find_common_ancestor_distance( + taxon: str, other_taxon: str, taxonomy_db_path: str, only_canonical: bool +): + canonical = "--only_canonical" if only_canonical else "" + cmd_str = f"taxonomy_util -d {taxonomy_db_path} common_ancestor_distance {canonical} '{other_taxon}' '{taxon}'" + cmd = shlex.split(cmd_str) + proc = run(cmd, encoding="utf8", capture_output=True) + return proc + + +def find_distances(gnm2tab_file: TextIO, taxon: str, taxonomy_db_path: str): + cmd = ["taxonomy_util", "-d", taxonomy_db_path, "get_id", taxon] + proc = run(cmd, capture_output=True, encoding="utf8") + if "not found in" in proc.stderr: + exit("Error: " + proc.stderr.strip()) + for line in gnm2tab_file: + fields = line.split("\t") + (species_code, settings, other_taxon) = map(lambda el: el.strip(), fields[:3]) + proc = find_common_ancestor_distance(taxon, other_taxon, taxonomy_db_path, True) + ancestor_info = proc.stdout.rstrip() + if proc.stderr != "": + print("Warning:", other_taxon, proc.stderr.rstrip(), file=sys.stderr) + else: + proc = find_common_ancestor_distance( + taxon, other_taxon, taxonomy_db_path, False + ) + non_canonical_distance = proc.stdout.split("\t")[0] + print( + non_canonical_distance, + ancestor_info, + species_code, + settings, + other_taxon, + sep="\t", + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Find distance to common ancestor") + parser.add_argument( + "--taxonomy_db", required=True, help="NCBI Taxonomy database (SQLite format)" + ) + parser.add_argument( + "--gnm2tab_file", + required=True, + type=argparse.FileType(), + help="gnm2tab file from spal", + ) + parser.add_argument("taxon") + args = parser.parse_args() + + find_distances(args.gnm2tab_file, args.taxon, args.taxonomy_db) |
b |
diff -r 95ea8d97abb4 -r 37b5e1f0b544 list_spaln_tables.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/list_spaln_tables.xml Thu Jul 16 07:57:10 2020 -0400 |
[ |
@@ -0,0 +1,132 @@ +<tool id="list_spaln_tables" name="List spaln parameter tables" version="@TOOL_VERSION@+galaxy0"> + <description>Given a query species, list the spaln settings tables that exist, from closest related species to most different</description> + <macros> + <import>macros.xml</import> + </macros> + <requirements> + <requirement type="package" version="3.8">python</requirement> + <requirement type="package" version="@TOOL_VERSION@">spaln</requirement> + <requirement type="package" version="1.0.7">rust-ncbitaxonomy</requirement> + </requirements> + <command detect_errors="aggressive"><![CDATA[ + #if $gnm2tab.gnm2tab_source == "spaln_tool" + SPALN_LOCATION=\$(dirname \$(dirname \$(which spaln))) && + GNM2TAB_PATH="\$SPALN_LOCATION/share/spaln/table/gnm2tab" && + if [[ -f "\$GNM2TAB_PATH" ]] ; then + ln -s "\$GNM2TAB_PATH" gnm2tab ; + else + echo "Cannot find builtin gnm2tab file \$SPALN_LOCATION : \$GNM2TAB_PATH" >&2 ; exit 1 ; + fi && + #else + ln -s '${gnm2tab.gnm2tab_file}' gnm2tab && + #end if + #if $taxonomy.taxonomy_source == "cached" + ln -s '${taxonomy.taxonomy_sqlite_table.fields.path}/tax.ncbitaxonomy.sqlite' tax.ncbitaxonomy.sqlite && + #else + ln -s '${taxonomy.taxonomy_sqlite_file}' tax.ncbitaxonomy.sqlite && + #end if + python '${__tool_directory__}/list_spaln_tables.py' --taxonomy_db tax.ncbitaxonomy.sqlite --gnm2tab_file gnm2tab '$taxon' | sort -k1n > '${output}' + ]]></command> + <inputs> + <param name="taxon" label="Scientific name" type="text" /> + <conditional name="gnm2tab"> + <param name="gnm2tab_source" type="select" label="Choose source of gnm2tab file"> + <option value="spaln_tool" selected="true">From the spaln tool installation</option> + <option value="history">From dataset in history</option> + </param> + <when value="spaln_tool" /> + <when value="history"> + <param name="gnm2tab_file" type="data" format="tabular" label="gnm2tab file from spaln" help="The gnm2tab file should be from the table directory of spaln v. @TOOL_VERSION@" /> + </when> + </conditional> + <conditional name="taxonomy"> + <param type="select" name="taxonomy_source"> + <option value="cached" selected="true">Use built-in NCBI Taxonomy SQLite database</option> + <option value="history">Use NCBI Taxonomy SQLite database from history</option> + </param> + <when value="cached"> + <param type="select" name="taxonomy_sqlite_table" label="NCBI Taxonomy SQLite database"> + <options from_data_table="ncbi_taxonomy_sqlite"> + <filter type="sort_by" column="1" /> + <validator type="no_options" message="No NCBI Taxonomy SQLite database is available" /> + </options> + </param> + </when> + <when value="history"> + <param name="taxonomy_sqlite_file" type="data" format="sqlite" label="NCBI Taxonomy SQLite database" /> + </when> + </conditional> + </inputs> + <outputs> + <data name="output" format="tabular"> + <actions> + <action name="column_names" type="metadata" default="dist_all,dist_canonical,common_ancestor,species_code,settings_group,scientific_name" /> + </actions> + </data> + </outputs> + + <tests> + <test> + <param name="taxon" value="Lates calcarifer" /> + <conditional name="gnm2tab"> + <param name="gnm2tab_source" value="spaln_tool" /> + </conditional> + <conditional name="taxonomy"> + <param name="taxonomy_source" value="history" /> + <param name="taxonomy_sqlite_file" ftype="sqlite" value="sqlite_taxdb/tax.ncbitaxonomy.sqlite" /> + </conditional> + <output name="output"> + <assert_contents> + <has_line_matching expression="5\t4\tActinopteri\tcynosemi\tFish_mdl\tCynoglossus semilaevis" /> + <has_line_matching expression="30\t7\tEukaryota\tzea_mays\tMagnolio\tZea mays" /> + </assert_contents> + </output> + </test> + <test> + <param name="taxon" value="Lates calcarifer" /> + <conditional name="gnm2tab"> + <param name="gnm2tab_source" value="spaln_tool" /> + </conditional> + <conditional name="taxonomy"> + <param name="taxonomy_source" value="cached" /> + <param name="taxonomy_sqlite_table" value="sample" /> + </conditional> + <output name="output"> + <assert_contents> + <has_line_matching expression="5\t4\tActinopteri\tcynosemi\tFish_mdl\tCynoglossus semilaevis" /> + <has_line_matching expression="30\t7\tEukaryota\tzea_mays\tMagnolio\tZea mays" /> + </assert_contents> + </output> + </test> + <test> + <param name="taxon" value="Lates calcarifer" /> + <conditional name="gnm2tab"> + <param name="gnm2tab_source" value="history" /> + <param name="gnm2tab_file" ftype="tabular" value="gnm2tab" /> + </conditional> + <conditional name="taxonomy"> + <param name="taxonomy_source" value="cached" /> + <param name="taxonomy_sqlite_table" value="sample" /> + </conditional> + <output name="output"> + <assert_contents> + <has_line_matching expression="5\t4\tActinopteri\tcynosemi\tFish_mdl\tCynoglossus semilaevis" /> + <has_line_matching expression="30\t7\tEukaryota\tzea_mays\tMagnolio\tZea mays" /> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ + Spaln has a number of pre-computed settings files to tune its predictions for different species. These + are listed in a file named gnm2tab_ in the package. This tool uses the NCBI Taxonomy database to search + that able for a suitable (i.e. taxonomically close) set of settings for optimising spaln's alignment + predictions. + + Input is the scientific name of a species (as reflected in the NCBI Taxonomy DB), output is + information from the gnm2tab file sorted by taxonomic distance from the query species. + + .. _gnm2tab: https://github.com/ogotoh/spaln/blob/master/table/gnm2tab + ]]></help> + <citations> + </citations> +</tool> |
b |
diff -r 95ea8d97abb4 -r 37b5e1f0b544 macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Thu Jul 16 07:57:10 2020 -0400 |
b |
@@ -0,0 +1,3 @@ +<macros> + <token name="@TOOL_VERSION@">2.4.03</token> +</macros> \ No newline at end of file |
b |
diff -r 95ea8d97abb4 -r 37b5e1f0b544 spaln.xml --- a/spaln.xml Fri Jan 11 18:15:21 2019 -0500 +++ b/spaln.xml Thu Jul 16 07:57:10 2020 -0400 |
[ |
b'@@ -1,7 +1,7 @@\n <tool id="spaln" name="Spaln: align cDNA or Protein to genome" version="@TOOL_VERSION@+galaxy0">\n <description>Maps and aligns a set of cDNA or protein sequences onto a whole genomic sequence.</description>\n <macros>\n- <token name="@TOOL_VERSION@">2.3.2</token>\n+ <import>macros.xml</import>\n </macros>\n <edam_topics>\n <edam_topic>topic_3512</edam_topic>\n@@ -10,7 +10,36 @@\n <requirement type="package" version="@TOOL_VERSION@">spaln</requirement>\n </requirements>\n <command detect_errors="aggressive"><![CDATA[\n-\t spaln -t\\${GALAXY_SLOTS:-1} -O$format -o \'$output1\' \'$genome\' \'$query\'\n+\t spaln -t\\${GALAXY_SLOTS:-1} -O$format\n+ #if str($species_params).strip() != \'\'\n+ -T\'${species_params}\'\n+ #end if\n+ #if $adv.use == "yes"\n+ -S\'${adv.query_orientation}\'\n+ -V\'${adv.hirschberg_threshold}\'\n+ -pa\'${adv.polya_trim}\'\n+ ${adv.all_results}\n+ -yu\'${adv.gap_extension_penalty}\'\n+ -yv\'${adv.gap_open_penalty}\'\n+ -yw\'${adv.dp_matrix_scan_width}\'\n+ -ya\'${adv.splice_stringency}\'\n+ -yj\'${adv.gap_penalty_incline}\'\n+ -yk\'${adv.gap_penalty_flex}\'\n+ \'${adv.double_affine_gap}\'\n+ -ym\'${adv.match_score}\'\n+ -yn\'${adv.mismatch_score}\'\n+ -yo\'${adv.stop_codon_penalty}\'\n+ -yx\'${adv.frameshift_penalty}\'\n+ -yy\'${adv.splice_site_weight}\n+ -yz\'${adv.coding_potential_weight}\'\n+ -yB\'${adv.branch_point_weight}\n+ -yL\'${adv.min_intron_len}\'\n+ -yZ\'${adv.intron_potential_weight}\'\n+ #if str($adv.max_gene_length).strip() != \'\'\n+ -XG\'${adv.max_gene_length}\'\n+ #end if\n+ #end if\n+ \'$genome\' \'$query\' >\'$output1\' \n ]]></command>\n <inputs>\n <param type="data" name="genome" format="fasta" label="Genome sequence to search (FASTA format)" />\n@@ -20,7 +49,49 @@\n \t <option value="2">GFF3 format matches</option>\n \t <option value="3">BED format</option>\n \t <option value="4">Tabular format exon information</option>\n+ </param>\n+ <param argument="-T" name="species_params" type="text" optional="true" label="Species to use for parameter setting" help="Choose a species table (e.g. cynosemi) from which to read parameters to optimise spaln" />\n+ <conditional name="adv">\n+ <param type="select" name="use" label="Advanced settings">\n+ <option selected="true" value="no">No</option>\n+ <option value="yes">Yes</option>\n </param>\n+ <when value="no">\n+ </when>\n+ <when value="yes">\n+ <param argument="-S" name="query_orientation" type="select" label="DNA query orientation" help="Determines how to treat orientation of query sequence when searching">\n+ <option value="0">Infer orientation from sequence header (no poly-A/poly-T trimming)</option>\n+ <option value="1">Forward orientation only. Poly-A tail might be trimmed off</option>\n+ <option value="2">Reverse orientation only. Leading poly-T might be trimmed off</option>\n+ <option selected="true" value="3">Examine both orientations. Poly-A / Poly-T might be trimmed off</option>\n+ </param>\n+ <param argument="-V" name="hirschberg_threshold" type="integer" value="16777216" label="Minimum space to induce Hirschberg\'s algorithm" help="Default is 16M (16x1024x1024 bytes)" />\n+ <param argument="-pa" name="polya_trim" type="integer" value="12" label="Limit 3\' poly-As to this number of bases" help="poly-A/poly-T trimming is only done if -S (orientation) option is 0 or 3" />\n+ <param argument="-pw" name="all_results" type="boolean" checked="false" truevalue="-pw" falsevalue="" label="Report results even if the score is below the threshold" />\n+ <param argument="-yu" name="gap_exten'..b'ght" type="integer" value="0" label="Weight for branch point signal" />\n+ <param argument="-yL" name="min_intron_len" type="integer" value="30" label="Minimum expected length of intron" />\n+ <param argument="-yZ" name="intron_potential_weight" type="integer" value="0" label="Weight for intron potential" />\n+ <param argument="-XG" name="max_gene_length" type="text" label="Reset maximum expected gene size, suffix k or M is effective" />\n+ </when>\n+ </conditional>\n </inputs>\n <outputs>\n <data name="output1" format="tabular">\n@@ -49,32 +120,61 @@\n <param name="genome" ftype="fasta" value="genome.fasta" />\n <param name="query" ftype="fasta" value="query.fasta" />\n <param name="format" value="0"/>\n-\t <output name="output1" value="output1_gff_genes.gff3" />\n+ <conditional name="adv">\n+ <param name="use" value="no" />\n+ </conditional>\n+\t <output name="output1" ftype="gff3" value="output1_gff_genes.gff3" />\n </test>\n <test>\n <param name="genome" ftype="fasta" value="genome.fasta" />\n <param name="query" ftype="fasta" value="query.fasta" />\n <param name="format" value="2"/>\n-\t <output name="output1" value="output1_gff_matches.gff3" />\n+ <conditional name="adv">\n+ <param name="use" value="no" />\n+ </conditional>\n+\t <output name="output1" ftype="gff3" value="output1_gff_matches.gff3" />\n </test>\n <test>\n <param name="genome" ftype="fasta" value="genome.fasta" />\n <param name="query" ftype="fasta" value="query.fasta" />\n <param name="format" value="3"/>\n-\t <output name="output1" value="output1.bed12" />\n+ <conditional name="adv">\n+ <param name="use" value="no" />\n+ </conditional>\n+\t <output name="output1" ftype="bed12" value="output1.bed12" />\n </test>\n <test>\n <param name="genome" ftype="fasta" value="genome.fasta" />\n <param name="query" ftype="fasta" value="query.fasta" />\n <param name="format" value="4"/>\n-\t <output name="output1" value="output1.tabular" />\n+ <conditional name="adv">\n+ <param name="use" value="no" />\n+ </conditional>\n+\t <output name="output1" ftype="tabular" value="output1.tabular" />\n+ </test>\n+ <test>\n+ <param name="genome" ftype="fasta" value="genome.fasta" />\n+ <param name="query" ftype="fasta" value="query.fasta" />\n+ <param name="format" value="4"/>\n+ <param name="species_params" value="cynosemi" />\n+ <conditional name="adv">\n+ <param name="use" value="no" />\n+ </conditional>\n+\t <output name="output1" ftype="tabular" value="output2.tabular" />\n </test>\n </tests>\n <help><![CDATA[\n Spaln_ (space-efficient spliced alignment) is a stand-alone program that maps and aligns a set of cDNA or\n protein sequences onto a whole genomic sequence in a single job. \n \n- This Galaxy wrapper currently only supports the default (i.e. *-O3*) algorithm for Spaln with default parameters.\n+ This Galaxy wrapper currently only supports the default (i.e. *-O3*) algorithm for Spaln. This algorithm\n+ takes FASTA format query and genome sequence and finds an alignment of the query (either cDNA or protein)\n+ against the genome.\n+\n+ Spaln optionally takes a species name to use for parameter setting (the "-T" parameter). The \n+ "List spaln parameter tables" (list_spaln_tables) can be used to find a parameter file that is\n+ close (in terms of taxonomic distance) to your species of interest. Use of this setting is recommended.\n+\n \n .. _Spaln: http://www.genome.ist.i.kyoto-u.ac.jp/~aln_user/spaln/\n ]]></help>\n' |
b |
diff -r 95ea8d97abb4 -r 37b5e1f0b544 test-data/gnm2tab --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gnm2tab Thu Jul 16 07:57:10 2020 -0400 |
b |
b'@@ -0,0 +1,688 @@\n+aaosarxi\tDothide2\tAaosphaeria arxii \tF\n+acidrich\tDothide1\tAcidomyces richmondensis \tF\n+acrealca\tSordari1\tAcremonium alcalophilum \tF\n+acyrpisu\tInsectDm\tAcyrthosiphon pisum \tA_I\n+aedeaegy\tInsectDm\tAedes aegypti \tA_I\n+agarbisp\tAgarico1\tAgaricus bisporus \tF\n+agarhyph\tAgarico2\tAgaricostilbum hyphaenes \tF\n+ailumela\tTetrapod\tAiluropoda melanoleuca \tA_M\n+albulaib\talbulaib\tAlbugo laibachii \tO\n+allomacr\tBlastocl\tAllomyces macrogynus \tF\n+altebras\tDothide2\tAlternaria brassicicola \tF\n+amanmusc\tAgaricom\tAmanita muscaria \tF\n+amanthie\tAgarico2\tAmanita thiersii \tF\n+ambotric\tambotric\tAmborella trichopoda \tP\n+amnilign\tDothide2\tAmniculicola lignicola \tF\n+amorresi\tLeotiom1\tAmorphotheca resinae \tF\n+amphquee\tamphquee\tAmphimedon queenslandica \tA_O\n+anasplat\tTetrapod\tAnas platyrhynchos \tA_B\n+anolcaro\tTetrapod\tAnolis carolinensis \tA_R\n+anopdarl\tInsectAg\tAnopheles darlingi \tA_I\n+anopgamb\tInsectAg\tAnopheles gambiae \tA_I\n+anthavoc\tSordari1\tAnthostoma avocetta \tF\n+antolocu\tFungiAll\tAntonospora locustae \tF\n+antrsinu\tAgarico2\tAntrodia sinuosa \tF\n+apiomont\tSordari1\tApiospora montagnei \tF\n+apisflor\tInsectAg\tApis florea \tA_I\n+apismell\tInsectAg\tApis mellifera \tA_I \t\n+aplakerg\tLabyrint\tAplanochytrium kerguelense \tO\n+aploprun\tDothide2\tAplosporella prunicola \tF\n+aplycali\tChordatU\tAplysia californica \tA_mo\n+aquicoer\tEudicoty\tAquilegia coerulea \tP\n+arablyra\tEudicoty\tArabidopsis lyrata \tP\n+arabthal\tEudicoty\tArabidopsis thaliana \tP\n+armimell\tAgaricom\tArmillaria mellea \tF\n+arthbenh\tEurotio1\tArthroderma benhamiae \tF\n+artholig\tSordari1\tArthrobotrys oligospora \tF\n+ascoimme\tPezizomy\tAscobolus immersus \tF\n+ascorube\tSaccharo\tAscoidea rubescens \tF\n+ascosarc\tLeotiom1\tAscocoryne sarcoides \tF\n+ashbgoss\tSaccharo\tAshbya gossypii \tF\n+aspeacid\tEurotio2\tAspergillus acidus \tF\n+aspeacul\tEurotio2\tAspergillus aculeatus \tF\n+aspebras\tEurotio2\tAspergillus brasiliensis\tF\n+aspecarb\tEurotio2\tAspergillus carbonarius \tF\n+aspeclav\tEurotio2\tAspergillus clavatus \tF\n+aspeflav\tEurotio2\tAspergillus flavus \tF\n+aspefumi\tEurotio2\tAspergillus fumigatus \tF\n+aspeglau\tEurotio2\tAspergillus glaucus \tF\n+aspekawa\tEurotio2\tAspergillus kawachii \tF\n+aspenidu\tEurotio2\tAspergillus nidulans \tF\n+aspenige\tEurotio2\tAspergillus niger\t \tF\n+aspeoryz\tEurotio2\tAspergillus oryzae\t \tF\n+aspesydo\tEurotio2\tAspergillus sydowii\t \tF\n+aspeterr\tEurotio2\tAspergillus terreus\t \tF\n+aspetubi\tEurotio2\tAspergillus tubingensis\t \tF\n+aspevers\tEurotio2\tAspergillus versicolor\t \tF\n+aspewent\tEurotio2\tAspergillus wentii\t \tF\n+aspezona\tEurotio2\tAspergillus zonatus\t \tF\n+astespec\tastespec\tAsterochloris spec_Cgr/DA1pho \tP\n+astymexi\tFish_lng\tAstyanax mexicanus\t \tA_F\n+atraspec\tatraspec\tAtractiellales spec\t\tF\n+attaceph\tInsectAc\tAtta cephalotes \tA_I\n+aulohede\tDothide2\tAulographum hederae \tF\n+auralima\tLabyrint\tAurantiochytrium limacinum \tO\n+aureanop\tPelagoph\tAureococcus anophagefferens \tO\n+aurepull\tDothide2\tAureobasidium pulm_lulans \tF\n+aurepulm\tDothide2\tAureobasidium pulm_lulans \tF\n+aurepuln\tDothide2\tAureobasidium puln_lulans \tF\n+aurepulp\tDothide2\tAureobasidium pulp_lulans\tF\n+aurepuls\tDothide2\tAureobasidium puls_lulans \tF\n+aurideli\tAgarico1\tAuricularia delicata\t \tF\n+aurisubg\tAgarico1\tAuricularia subglabra\t \tF\n+babjinos\tSaccharo\tBabjeviella inositovor\t \tF\n+backcirc\tMucorale\tBackusella circina\t \tF\n+batrdend\tPuccinia\tBatrachochytrium dendrobatidis\tF\n+baudcomp\tDothide2\tBaudoinia compniacensis \tF\n+beaubass\tSordario\tBeauveria bassiana\t \tF\n+bigenata\tPelagoph\tBigelowiella natans\t \tO\n+bjeradus\tAgarico1\tBjerkandera adusta\t \tF\n+blasderm\tEurotio3\tBlastomyces dermatitidis\tF\n+blumgram\tLeotiomy\tBlumeria graminis '..b'\tTaphrina deformans \tF\n+tarssyri\tTetrapod\tTarsius syrichta \tA_M\n+terfboud\tPezizomy\tTerfezia boudieri \tF\n+tetrnigr\tFish_sht\tTetraodon nigroviridis \tA_F\n+tetrphaf\tSaccharo\tTetrapisispora phaffii \tF\n+tetrther\ttetrther\tTetrahymena thermophila \tO\n+tetrurti\tInsectAg\tTetranychus urticae \tA_I\n+thalpseu\tthalpseu\tThalassiosira pseudonana \tO\n+thectrah\tFungiAll\tThecamonas trahens \tO\n+theiannu\tTheileri\tTheileria annulata \tA_O\n+theiparv\tTheileri\tTheileria parva \tA_O\n+thelhalo\tEudicoty\tThellungiella halophila \tP\n+thelparv\tEudicoty\tThellungiella parvula \tP\n+theocaca\tEudicoty\tTheobroma cacao \tP\n+theraura\tEurotio2\tThermoascus aurantiacus \tF\n+thieanta\tFungiAll\tThielavia antarctica \tF\n+thieappe\tSordari1\tThielavia appendiculata \tF\n+thiearen\tSordari1\tThielavia arenaria \tF\n+thiehyrc\tSordari1\tThielavia hyrcaniae \tF\n+thieterr\tSordari1\tThielavia terrestris \tF\n+tillanom\ttillanom\tTilletiaria anomala \tF\n+torudelb\tSaccharo\tTorulaspora delbrueckii \tF\n+toxogond\tConoidas\tToxoplasma gondii \tO\n+tramvers\tAgarico2\tTrametes versicolor \tF\n+tremmese\tTremell2\tTremella mesenterica \tF\n+trempert\ttrempert\tTrematosphaeria pertusa \tF\n+tremspec\tTremell2\tTremella species \tF\n+tribcast\ttribcast\tTribolium castaneum \tA_I\n+tricabie\tAgarico2\tTrichaptum abietinum \tF\n+tricadha\ttricadha\tTrichoplax adhaerens \tA_O\n+tricaspe\tSordari1\tTrichoderma asperellum \tF\n+tricatro\tSordari1\tTrichoderma atroviride \tF\n+triccitr\tSordario\tTrichoderma citrinoviride\tF\n+tricequi\tEurotio2\tTrichophyton equinum \tF\n+tricharz\tSordari1\tTrichoderma harzianum \tF\n+triclong\tSordari1\tTrichoderma longibrachiatum\tF\n+tricmana\tTetrapod\tTrichechus manatus \tA_M\n+tricmats\tAgarico1\tTricholoma matsutake \tF\n+tricolea\tTremell2\tTrichosporon oleaginosus \tF\n+tricrees\tSordari1\tTrichoderma reesei \tF\n+tricrubr\tEurotio1\tTrichophyton rubrum \tF\n+tricspir\ttribcast\tTrichinella spiralis \tA_O\n+trictons\tEurotio1\tTrichophyton tonsurans \tF\n+tricverr\tEurotio1\tTrichophyton verrucosum \tF\n+tricvire\tSordari1\tTrichoderma virens \tF\n+tritaesa\tMagnolio\tTriticum aestivum \tP\n+tritaesb\tMagnolio\tTriticum aestivum \tP\n+tritaesd\tMagnolio\tTriticum aestivum \tP\n+tritspec\ttritspec\tTritirachium species \tF\n+trypbruc\tArchamoe\tTrypanosoma brucei \tO\n+trypelut\tDothide2\tTrypethelium eluteriae \tF\n+tubemela\tPezizomy\tTuber melanosporum \tF\n+tulacalo\tAgarico4\tTulasnella calospora \tF\n+tupabela\tTetrapod\tTupaia belangeri \tA_M\n+turstrun\tTetrapod\tTursiops truncatus \tA_M\n+umberama\tMucorale\tUmbelopsis ramanniana \tF\n+uncirees\tEurotio1\tUncinocarpus reesii \tF\n+ustimayd\tEurotio3\tUstilago maydis \tF\n+vertalbo\tSordario\tVerticillium alboatrum \tF\n+vertalfa\tSordario\tVerticillium alfalfae \tF\n+vertdahl\tSordario\tVerticillium dahliae \tF\n+vicupaco\tTetrapod\tVicugna pacos \tA_\n+vitivini\tEudicoty\tVitis vinifera \tP\n+volvcart\tChlospec\tVolvox carteri \tP\n+volvvolv\tAgaricom\tVolvariella volvacea \tF\n+wallicht\tDothide4\tWallemia ichthyophaga \tF\n+wallsebi\twallsebi\tWallemia sebi \tF\n+wickanom\tSaccharo\tWickerhamomyces anomalus \tF\n+wilcmiko\tPezizomy\tWilcoxina mikolae \tF\n+wolfcoco\tAgarico2\tWolfiporia cocos \tF\n+xantpari\tAgarico2\tXanthoria parietina \tF\n+xenotrop\tTetrapod\tXenopus tropicalis \tA_A\n+xiphmacu\tFish_mdl\tXiphophorus maculatus \tA_F\n+xyloheve\txyloheve\tXylona heveae \tF\n+yarrlipo\tSaccharo\tYarrowia lipolytica \tF\n+zasmcell\tDothide2\tZasmidium cellare \tF\n+zea_mays\tMagnolio\tZea mays \tP\n+zonoalbi\tTetrapod\tZonotrichia albicollis \tA_B\n+zopfrhiz\tDothide2\tZopfia rhizophila \tF\n+zygoroux\tSaccharo\tZygosaccharomyces rouxii \tF\n+zymotrit\tDothide2\tZymoseptoria tritici \tF\n' |
b |
diff -r 95ea8d97abb4 -r 37b5e1f0b544 test-data/ncbi_taxonomy_sqlite.loc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ncbi_taxonomy_sqlite.loc Thu Jul 16 07:57:10 2020 -0400 |
b |
@@ -0,0 +1,2 @@ +#value description path +sample sample database ${__HERE__}/sqlite_taxdb |
b |
diff -r 95ea8d97abb4 -r 37b5e1f0b544 test-data/output1.bed12 --- a/test-data/output1.bed12 Fri Jan 11 18:15:21 2019 -0500 +++ b/test-data/output1.bed12 Thu Jul 16 07:57:10 2020 -0400 |
b |
@@ -1,2 +1,2 @@ track name=Spaln description="NP_001121846.1" useScore=1 -scaffold_1 233 29800 NP_001121846.1 1000 + 233 29800 255,0,0 43 31,30,125,9,9,106,4,9,5,37,8,18,12,18,9,4,30,18,8,3,2,3,15,6,10,7,5,7,14,2,11,20,12,12,2,19,23,26,5,7,9,23,5, 0,71,184,1922,2587,6911,7425,7504,7626,8016,8114,8185,8347,8632,8746,9149,9725,15480,15549,15615,16511,16714,16848,17115,17191,18118,18504,19271,19365,19443,20377,22213,22440,25892,25951,26121,26227,26308,26406,28178,28715,28868,29562 +scaffold_1 16 10418 NP_001121846.1 530 + 16 10418 255,0,0 16 46,33,122,127,33,4,127,33,6,18,15,11,13,24,24,24, 0,111,401,7107,7546,7642,7721,7908,8278,8849,8963,9050,9959,10028,10212,10378 |
b |
diff -r 95ea8d97abb4 -r 37b5e1f0b544 test-data/output1.tabular --- a/test-data/output1.tabular Fri Jan 11 18:15:21 2019 -0500 +++ b/test-data/output1.tabular Thu Jul 16 07:57:10 2020 -0400 |
b |
b'@@ -1,45 +1,18 @@\n # rID\t gID\t %id\t ExonL\t MisMch\t Unpair\t ref_l\t ref_r\t tgt_l\t tgt_r\t eScore\t IntrnL\t iScore\t Sig3/I\t Sig5/T # - X P DiNuc\n-NP_001121846.1\tscaffold_1\t 30.00\t 31\t 7\t 0\t 1\t 10\t 234\t 264\t 71.9\t 0\t 0.0\t 67.90\t -1.80 0 0 0 0 . \n-NP_001121846.1\tscaffold_1\t 16.67\t 30\t 6\t 9\t 11\t 27\t 305\t 334\t -9.0\t 40\t -8.2\t 9.90\t -2.20 0 0 0 1 GT.AG\n-NP_001121846.1\tscaffold_1\t 64.29\t 125\t 15\t 0\t 28\t 69\t 418\t 542\t 226.4\t 83\t 8.4\t 22.20\t 16.00 0 0 0 1 GT.AG\n-NP_001121846.1\tscaffold_1\t 66.67\t 9\t 1\t 0\t 70\t 72\t 2156\t 2164\t 37.9\t 1613\t -6.6\t 4.40\t 21.90 0 0 0 0 GT.AG\n-NP_001121846.1\tscaffold_1\t 66.67\t 9\t 1\t 0\t 73\t 75\t 2821\t 2829\t 32.3\t 656\t 7.5\t 7.20\t 12.10 0 0 0 0 GT.AG\n-NP_001121846.1\tscaffold_1\t 62.86\t 106\t 13\t 0\t 76\t 110\t 7145\t 7250\t 176.6\t 4315\t 4.0\t 25.10\t 25.30 0 0 0 0 GT.AG\n-NP_001121846.1\tscaffold_1\t 0.00\t 4\t 2\t 0\t 111\t 112\t 7659\t 7662\t 22.9\t 408\t 11.8\t 5.70\t 17.50 0 0 0 1 GT.AG\n-NP_001121846.1\tscaffold_1\t 0.00\t 9\t 2\t 1\t 113\t 114\t 7738\t 7746\t 4.9\t 75\t 20.0\t 13.50\t 2.90 0 0 0 2 GT.AG\n-NP_001121846.1\tscaffold_1\t 0.00\t 5\t 1\t 0\t 115\t 115\t 7860\t 7864\t 24.0\t 113\t -3.9\t 7.30\t 16.20 0 0 0 2 GT.AG\n-NP_001121846.1\tscaffold_1\t 30.00\t 37\t 7\t 0\t 116\t 125\t 8250\t 8286\t 15.8\t 385\t 9.7\t 12.50\t 11.50 0 0 0 1 GT.AG\n-NP_001121846.1\tscaffold_1\t 100.00\t 8\t 0\t 0\t 126\t 127\t 8348\t 8355\t 28.3\t 61\t 18.0\t 16.20\t -0.80 0 0 0 2 GT.AG\n-NP_001121846.1\tscaffold_1\t 33.33\t 18\t 4\t 0\t 128\t 133\t 8419\t 8436\t 26.2\t 63\t 1.5\t 12.10\t 6.70 0 0 0 1 GT.AG\n-NP_001121846.1\tscaffold_1\t 0.00\t 12\t 4\t 0\t 134\t 137\t 8581\t 8592\t 29.2\t 144\t 9.9\t 18.20\t 9.20 0 0 0 1 GT.AG\n-NP_001121846.1\tscaffold_1\t 0.00\t 18\t 6\t 0\t 138\t 143\t 8866\t 8883\t 29.1\t 273\t 5.7\t 14.00\t 24.90 0 0 0 1 GT.AG\n-NP_001121846.1\tscaffold_1\t 0.00\t 9\t 3\t 0\t 144\t 146\t 8980\t 8988\t 28.2\t 96\t 33.9\t 21.70\t 10.20 0 0 0 1 GT.AG\n-NP_001121846.1\tscaffold_1\t 50.00\t 4\t 1\t 0\t 147\t 148\t 9383\t 9386\t 33.7\t 394\t -5.5\t 3.40\t 14.10 0 0 0 1 GT.AG\n-NP_001121846.1\tscaffold_1\t 10.00\t 30\t 5\t 4\t 149\t 154\t 9959\t 9988\t 21.1\t 572\t 3.1\t 10.30\t 20.60 0 0 0 2 GT.AG\n-NP_001121846.1\tscaffold_1\t 16.67\t 18\t 5\t 0\t 155\t 160\t 15714\t 15731\t 43.6\t 5725\t 3.9\t 18.20\t 22.40 0 0 0 2 GT.AG\n-NP_001121846.1\tscaffold_1\t 50.00\t 8\t 1\t 0\t 161\t 162\t 15783\t 15790\t 19.0\t 51\t 8.8\t -3.20\t 17.10 0 0 0 2 GT.AG\n-NP_001121846.1\tscaffold_1\t 0.00\t 3\t 1\t 0\t 163\t 163\t 15849\t 15851\t 17.0\t 58\t 20.8\t 13.40\t 3.40 0 0 0 1 GC.AG\n-NP_001121846.1\tscaffold_1\t 0.00\t 2\t 1\t 0\t 164\t 164\t 16745\t 16746\t 20.9\t 893\t -5.3\t 14.60\t 8.20 0 0 0 1 GT.AG\n-NP_001121846.1\tscaffold_1\t 0.00\t 3\t 1\t 0\t 165\t 165\t 16948\t 16950\t 27.2\t 201\t 14.2\t 22.30\t 9.10 0 0 0 0 GT.AG\n-NP_001121846.1\tscaffold_1\t 100.00\t 15\t 0\t 0\t 166\t 166\t 17082\t 17096\t 13.3\t 131\t 17.9\t 23.40\t 2.50 0 0 0 0 GT.AG\n-NP_001121846.1\tscaffold_1\t 0.00\t 6\t 1\t 1\t 167\t 167\t 17349\t 17354\t 28.3\t 252\t -9.3\t 5.30\t 22.50 0 0 0 0 GT.AG\n-NP_001121846.1\tscaffold_1\t 33.33\t 10\t 2\t 0\t 168\t 170\t 17425\t 17434\t 15.0\t 70\t 22.3\t 10.10\t 2.30 0 0 0 0 GT.AG\n-NP_001121846.1\tscaffold_1\t 33.33\t 7\t 2\t 0\t 171\t 173\t 18352\t 18358\t 24.7\t 917\t -18.2\t 2.90\t 16.40 0 0 0 1 GT.AG\n-NP_001121846.1\tscaffold_1\t '..b'197\t 26126\t 26137\t 31.8\t 3440\t -1.3\t 22.70\t 14.00 0 0 0 1 GT.AG\n-NP_001121846.1\tscaffold_1\t 0.00\t 2\t 1\t 0\t 198\t 198\t 26185\t 26186\t 22.5\t 47\t 14.8\t 12.40\t 8.30 0 0 0 1 GT.AG\n-NP_001121846.1\tscaffold_1\t 33.33\t 19\t 4\t 0\t 199\t 204\t 26355\t 26373\t 33.1\t 168\t 7.8\t 15.10\t 7.10 0 0 0 0 GT.AG\n-NP_001121846.1\tscaffold_1\t 0.00\t 23\t 3\t 0\t 205\t 207\t 26461\t 26483\t 8.0\t 87\t 11.9\t 16.80\t 8.10 0 0 0 1 GT.AG\n-NP_001121846.1\tscaffold_1\t 33.33\t 26\t 2\t 4\t 208\t 212\t 26542\t 26567\t 22.6\t 58\t 18.9\t 20.50\t -0.50 0 0 0 0 GT.AG\n-NP_001121846.1\tscaffold_1\t 0.00\t 5\t 1\t 0\t 213\t 213\t 26640\t 26644\t 24.6\t 72\t 3.9\t 14.90\t 11.30 0 0 0 2 GT.AG\n-NP_001121846.1\tscaffold_1\t 33.33\t 7\t 2\t 0\t 214\t 216\t 28412\t 28418\t 28.0\t 1767\t -4.5\t 11.80\t 12.30 0 0 0 1 GT.AG\n-NP_001121846.1\tscaffold_1\t 100.00\t 9\t 0\t 0\t 217\t 217\t 28949\t 28957\t 26.9\t 530\t 17.1\t 25.30\t 10.30 0 0 0 2 GT.AG\n-NP_001121846.1\tscaffold_1\t 28.57\t 23\t 2\t 3\t 218\t 221\t 29102\t 29124\t 36.1\t 144\t 16.5\t 21.20\t 10.60 0 0 0 2 GT.AG\n-NP_001121846.1\tscaffold_1\t 0.00\t 5\t 1\t 1\t 222\t 222\t 29796\t 29800\t 24.4\t 671\t 8.0\t 19.10\t 10.60 0 0 0 1 GT.AG\n-@ scaffold_1 + ( 234 29800 ) NP_001121846.1 222 ( 1 222 ) S: 1357.3 =: 38.1 C: 110.8 T#: 122 T-: 26 B#: 0 B-: 0 X: 0 Pam: 0\n+NP_001121846.1\tscaffold_1\t 21.43\t 46\t 11\t 0\t 1\t 14\t 17\t 62\t 79.7\t 0\t 0.0\t 66.40\t 0.90 0 0 0 0 . \n+NP_001121846.1\tscaffold_1\t 12.50\t 33\t 8\t 3\t 15\t 27\t 128\t 160\t -4.3\t 65\t -23.9\t -5.20\t 10.70 0 0 0 1 GT.AG\n+NP_001121846.1\tscaffold_1\t 68.29\t 122\t 13\t 0\t 28\t 68\t 418\t 539\t 210.5\t 257\t -0.1\t 16.10\t 6.60 0 0 0 1 GT.AG\n+NP_001121846.1\tscaffold_1\t 64.29\t 127\t 15\t 0\t 69\t 110\t 7124\t 7250\t 187.0\t 6584\t -24.3\t 14.60\t 17.80 0 0 0 0 GT.AG\n+NP_001121846.1\tscaffold_1\t 9.09\t 33\t 10\t 0\t 111\t 121\t 7563\t 7595\t 11.0\t 312\t -4.2\t 5.70\t 5.20 0 0 0 1 GT.AG\n+NP_001121846.1\tscaffold_1\t 0.00\t 4\t 2\t 0\t 122\t 123\t 7659\t 7662\t 21.2\t 63\t -6.4\t 7.90\t 13.90 0 0 0 1 GT.AG\n+NP_001121846.1\tscaffold_1\t 20.45\t 127\t 31\t 2\t 124\t 165\t 7738\t 7864\t 21.7\t 75\t 5.0\t 11.80\t 13.20 0 0 0 2 GT.AG\n+NP_001121846.1\tscaffold_1\t 9.09\t 33\t 10\t 0\t 166\t 176\t 7925\t 7957\t 26.1\t 60\t 1.0\t 7.20\t 9.70 0 0 0 0 GT.AG\n+NP_001121846.1\tscaffold_1\t 50.00\t 6\t 1\t 0\t 177\t 178\t 8295\t 8300\t 29.4\t 337\t -9.5\t 8.90\t 10.80 0 0 0 0 GT.AG\n+NP_001121846.1\tscaffold_1\t 16.67\t 18\t 5\t 0\t 179\t 184\t 8866\t 8883\t 31.0\t 565\t -7.7\t 12.00\t 17.60 0 0 0 0 GT.AG\n+NP_001121846.1\tscaffold_1\t 0.00\t 15\t 5\t 0\t 185\t 189\t 8980\t 8994\t 30.3\t 96\t 11.1\t 15.90\t 9.10 0 0 0 0 GT.AG\n+NP_001121846.1\tscaffold_1\t 0.00\t 11\t 4\t 0\t 190\t 193\t 9067\t 9077\t 27.0\t 72\t -4.0\t 7.10\t 7.60 0 0 0 0 GT.AG\n+NP_001121846.1\tscaffold_1\t 25.00\t 13\t 3\t 0\t 194\t 197\t 9976\t 9988\t 28.5\t 898\t -22.1\t 6.80\t 15.40 0 0 0 2 GT.AG\n+NP_001121846.1\tscaffold_1\t 37.50\t 24\t 5\t 0\t 198\t 205\t 10045\t 10068\t 27.5\t 56\t 3.7\t 7.70\t 5.10 0 0 0 0 GT.AG\n+NP_001121846.1\tscaffold_1\t 25.00\t 24\t 6\t 0\t 206\t 213\t 10229\t 10252\t 38.3\t 160\t -6.9\t 13.10\t 12.60 0 0 0 0 GT.AG\n+NP_001121846.1\tscaffold_1\t 20.00\t 24\t 6\t 1\t 214\t 222\t 10395\t 10418\t 23.3\t 142\t 0.3\t 12.40\t 12.60 0 0 0 0 GT.AG\n+@ scaffold_1 + ( 17 10418 ) NP_001121846.1 222 ( 1 222 ) S: 627.0 =: 36.5 C: 97.3 T#: 135 T-: 6 B#: 0 B-: 0 X: 0 Nexn: 16\n' |
b |
diff -r 95ea8d97abb4 -r 37b5e1f0b544 test-data/output1_gff_genes.gff3 --- a/test-data/output1_gff_genes.gff3 Fri Jan 11 18:15:21 2019 -0500 +++ b/test-data/output1_gff_genes.gff3 Thu Jul 16 07:57:10 2020 -0400 |
b |
@@ -1,47 +1,20 @@ ##gff-version 3 ##sequence-region scaffold_1 1 59940 -scaffold_1 ALN gene 234 29800 1357 + . ID=gene00001;Name=scaffold_1_15 -scaffold_1 ALN mRNA 234 29800 1357 + . ID=mRNA00001;Parent=gene00001;Name=scaffold_1_15 -scaffold_1 ALN cds 234 264 71 + 0 ID=cds00001;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 1 10 + -scaffold_1 ALN cds 305 334 -9 + 2 ID=cds00002;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 11 27 + -scaffold_1 ALN cds 418 542 226 + 2 ID=cds00003;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 28 69 + -scaffold_1 ALN cds 2156 2164 37 + 0 ID=cds00004;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 70 72 + -scaffold_1 ALN cds 2821 2829 32 + 0 ID=cds00005;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 73 75 + -scaffold_1 ALN cds 7145 7250 176 + 0 ID=cds00006;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 76 110 + -scaffold_1 ALN cds 7659 7662 22 + 2 ID=cds00007;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 111 112 + -scaffold_1 ALN cds 7738 7746 4 + 1 ID=cds00008;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 113 114 + -scaffold_1 ALN cds 7860 7864 24 + 1 ID=cds00009;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 115 115 + -scaffold_1 ALN cds 8250 8286 15 + 2 ID=cds00010;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 116 125 + -scaffold_1 ALN cds 8348 8355 28 + 1 ID=cds00011;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 126 127 + -scaffold_1 ALN cds 8419 8436 26 + 2 ID=cds00012;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 128 133 + -scaffold_1 ALN cds 8581 8592 29 + 2 ID=cds00013;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 134 137 + -scaffold_1 ALN cds 8866 8883 29 + 2 ID=cds00014;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 138 143 + -scaffold_1 ALN cds 8980 8988 28 + 2 ID=cds00015;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 144 146 + -scaffold_1 ALN cds 9383 9386 33 + 2 ID=cds00016;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 147 148 + -scaffold_1 ALN cds 9959 9988 21 + 1 ID=cds00017;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 149 154 + -scaffold_1 ALN cds 15714 15731 43 + 1 ID=cds00018;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 155 160 + -scaffold_1 ALN cds 15783 15790 19 + 1 ID=cds00019;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 161 162 + -scaffold_1 ALN cds 15849 15851 17 + 2 ID=cds00020;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 163 163 + -scaffold_1 ALN cds 16745 16746 20 + 2 ID=cds00021;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 164 164 + -scaffold_1 ALN cds 16948 16950 27 + 0 ID=cds00022;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 165 165 + -scaffold_1 ALN cds 17082 17096 13 + 0 ID=cds00023;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 166 166 + -scaffold_1 ALN cds 17349 17354 28 + 0 ID=cds00024;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 167 167 + -scaffold_1 ALN cds 17425 17434 15 + 0 ID=cds00025;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 168 170 + -scaffold_1 ALN cds 18352 18358 24 + 2 ID=cds00026;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 171 173 + -scaffold_1 ALN cds 18738 18742 17 + 1 ID=cds00027;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 174 174 + -scaffold_1 ALN cds 19505 19511 32 + 2 ID=cds00028;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 175 177 + -scaffold_1 ALN cds 19599 19612 16 + 1 ID=cds00029;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 178 181 + -scaffold_1 ALN cds 19677 19678 17 + 2 ID=cds00030;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 182 182 + -scaffold_1 ALN cds 20611 20621 30 + 0 ID=cds00031;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 183 184 + -scaffold_1 ALN cds 22447 22466 27 + 1 ID=cds00032;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 185 188 + -scaffold_1 ALN cds 22674 22685 31 + 2 ID=cds00033;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 189 192 + -scaffold_1 ALN cds 26126 26137 31 + 2 ID=cds00034;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 193 197 + -scaffold_1 ALN cds 26185 26186 22 + 2 ID=cds00035;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 198 198 + -scaffold_1 ALN cds 26355 26373 33 + 0 ID=cds00036;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 199 204 + -scaffold_1 ALN cds 26461 26483 8 + 2 ID=cds00037;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 205 207 + -scaffold_1 ALN cds 26542 26567 22 + 0 ID=cds00038;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 208 212 + -scaffold_1 ALN cds 26640 26644 24 + 1 ID=cds00039;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 213 213 + -scaffold_1 ALN cds 28412 28418 28 + 2 ID=cds00040;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 214 216 + -scaffold_1 ALN cds 28949 28957 26 + 1 ID=cds00041;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 217 217 + -scaffold_1 ALN cds 29102 29124 36 + 1 ID=cds00042;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 218 221 + -scaffold_1 ALN cds 29796 29800 24 + 2 ID=cds00043;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 222 222 + +scaffold_1 ALN gene 17 10418 627 + . ID=gene00001;Name=scaffold_1_5 +scaffold_1 ALN mRNA 17 10418 627 + . ID=mRNA00001;Parent=gene00001;Name=scaffold_1_5 +scaffold_1 ALN cds 17 62 79 + 0 ID=cds00001;Parent=mRNA00001;Name=scaffold_1_5;Target=NP_001121846.1 1 14 + +scaffold_1 ALN cds 128 160 -4 + 2 ID=cds00002;Parent=mRNA00001;Name=scaffold_1_5;Target=NP_001121846.1 15 27 + +scaffold_1 ALN cds 418 539 210 + 2 ID=cds00003;Parent=mRNA00001;Name=scaffold_1_5;Target=NP_001121846.1 28 68 + +scaffold_1 ALN cds 7124 7250 187 + 0 ID=cds00004;Parent=mRNA00001;Name=scaffold_1_5;Target=NP_001121846.1 69 110 + +scaffold_1 ALN cds 7563 7595 11 + 2 ID=cds00005;Parent=mRNA00001;Name=scaffold_1_5;Target=NP_001121846.1 111 121 + +scaffold_1 ALN cds 7659 7662 21 + 2 ID=cds00006;Parent=mRNA00001;Name=scaffold_1_5;Target=NP_001121846.1 122 123 + +scaffold_1 ALN cds 7738 7864 21 + 1 ID=cds00007;Parent=mRNA00001;Name=scaffold_1_5;Target=NP_001121846.1 124 165 + +scaffold_1 ALN cds 7925 7957 26 + 0 ID=cds00008;Parent=mRNA00001;Name=scaffold_1_5;Target=NP_001121846.1 166 176 + +scaffold_1 ALN cds 8295 8300 29 + 0 ID=cds00009;Parent=mRNA00001;Name=scaffold_1_5;Target=NP_001121846.1 177 178 + +scaffold_1 ALN cds 8866 8883 31 + 0 ID=cds00010;Parent=mRNA00001;Name=scaffold_1_5;Target=NP_001121846.1 179 184 + +scaffold_1 ALN cds 8980 8994 30 + 0 ID=cds00011;Parent=mRNA00001;Name=scaffold_1_5;Target=NP_001121846.1 185 189 + +scaffold_1 ALN cds 9067 9077 27 + 0 ID=cds00012;Parent=mRNA00001;Name=scaffold_1_5;Target=NP_001121846.1 190 193 + +scaffold_1 ALN cds 9976 9988 28 + 1 ID=cds00013;Parent=mRNA00001;Name=scaffold_1_5;Target=NP_001121846.1 194 197 + +scaffold_1 ALN cds 10045 10068 27 + 0 ID=cds00014;Parent=mRNA00001;Name=scaffold_1_5;Target=NP_001121846.1 198 205 + +scaffold_1 ALN cds 10229 10252 38 + 0 ID=cds00015;Parent=mRNA00001;Name=scaffold_1_5;Target=NP_001121846.1 206 213 + +scaffold_1 ALN cds 10395 10418 23 + 0 ID=cds00016;Parent=mRNA00001;Name=scaffold_1_5;Target=NP_001121846.1 214 222 + |
b |
diff -r 95ea8d97abb4 -r 37b5e1f0b544 test-data/output1_gff_matches.gff3 --- a/test-data/output1_gff_matches.gff3 Fri Jan 11 18:15:21 2019 -0500 +++ b/test-data/output1_gff_matches.gff3 Thu Jul 16 07:57:10 2020 -0400 |
b |
b'@@ -1,45 +1,18 @@\n ##gff-version\t3\n ##sequence-region\tscaffold_1 1 59940\n-scaffold_1\tALN\tnucleotide_to_protein_match\t234\t264\t71\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 1 10 +;Gap=M10 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t305\t334\t-9\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 11 27 +;Gap=M2 D1 M5 I8 M2 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t418\t542\t226\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 28 69 +;Gap=M42 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t2156\t2164\t37\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 70 72 +;Gap=M3 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t2821\t2829\t32\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 73 75 +;Gap=M3 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t7145\t7250\t176\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 76 110 +;Gap=M35 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t7659\t7662\t22\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 111 112 +;Gap=M2 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t7738\t7746\t4\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 113 114 +;Gap=D1 M2 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t7860\t7864\t24\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 115 115 +;Gap=M1 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t8250\t8286\t15\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 116 125 +;Gap=M10 D3 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t8348\t8355\t28\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 126 127 +;Gap=M2 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t8419\t8436\t26\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 128 133 +;Gap=M6 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t8581\t8592\t29\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 134 137 +;Gap=M4 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t8866\t8883\t29\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 138 143 +;Gap=M6 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t8980\t8988\t28\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 144 146 +;Gap=M3 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t9383\t9386\t33\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 147 148 +;Gap=M2 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t9959\t9988\t21\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 149 154 +;Gap=D4 M6 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t15714\t15731\t43\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 155 160 +;Gap=M6 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t15783\t15790\t19\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 161 162 +;Gap=M2 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t15849\t15851\t17\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 163 163 +;Gap=M1 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t16745\t16746\t20\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 164 164 +;Gap=M1 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t16948\t16950\t27\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 165 165 +;Gap=M1 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t17082\t17096\t13\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 166 166 +;Gap=M1 D4 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t17349\t17354\t28\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 167 167 +;Gap=D1 M1 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t17425\t17434\t15\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 168 170 +;Gap=M3 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t18352\t18358\t24\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 171 173 +;Gap=M3 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t18738\t18742\t17\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 174 174 +;Gap=M1 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t19505\t19511\t32\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 175 177 +;Gap=M3 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t19599\t19612\t16\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 178 181 +;Gap=M4 \n-s'..b'1_15;Target=NP_001121846.1 182 182 +;Gap=M1 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t20611\t20621\t30\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 183 184 +;Gap=M2 D2 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t22447\t22466\t27\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 185 188 +;Gap=D2 M4 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t22674\t22685\t31\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 189 192 +;Gap=M4 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t26126\t26137\t31\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 193 197 +;Gap=I1 M4 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t26185\t26186\t22\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 198 198 +;Gap=M1 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t26355\t26373\t33\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 199 204 +;Gap=M6 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t26461\t26483\t8\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 205 207 +;Gap=M3 D5 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t26542\t26567\t22\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 208 212 +;Gap=D4 M5 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t26640\t26644\t24\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 213 213 +;Gap=M1 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t28412\t28418\t28\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 214 216 +;Gap=M3 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t28949\t28957\t26\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 217 217 +;Gap=M1 D2 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t29102\t29124\t36\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 218 221 +;Gap=D3 M4 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t29796\t29800\t24\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 222 222 +;Gap=M1 D1 \n+scaffold_1\tALN\tnucleotide_to_protein_match\t17\t62\t79\t+\t.\tID=match00001;Name=scaffold_1_5;Target=NP_001121846.1 1 14 +;Gap=D1 M14 \n+scaffold_1\tALN\tnucleotide_to_protein_match\t128\t160\t-4\t+\t.\tID=match00001;Name=scaffold_1_5;Target=NP_001121846.1 15 27 +;Gap=M5 I3 M1 D1 M4 \n+scaffold_1\tALN\tnucleotide_to_protein_match\t418\t539\t210\t+\t.\tID=match00001;Name=scaffold_1_5;Target=NP_001121846.1 28 68 +;Gap=M41 \n+scaffold_1\tALN\tnucleotide_to_protein_match\t7124\t7250\t187\t+\t.\tID=match00001;Name=scaffold_1_5;Target=NP_001121846.1 69 110 +;Gap=M42 \n+scaffold_1\tALN\tnucleotide_to_protein_match\t7563\t7595\t11\t+\t.\tID=match00001;Name=scaffold_1_5;Target=NP_001121846.1 111 121 +;Gap=M11 \n+scaffold_1\tALN\tnucleotide_to_protein_match\t7659\t7662\t21\t+\t.\tID=match00001;Name=scaffold_1_5;Target=NP_001121846.1 122 123 +;Gap=M2 \n+scaffold_1\tALN\tnucleotide_to_protein_match\t7738\t7864\t21\t+\t.\tID=match00001;Name=scaffold_1_5;Target=NP_001121846.1 124 165 +;Gap=M14 D1 M7 I2 M19 D1 \n+scaffold_1\tALN\tnucleotide_to_protein_match\t7925\t7957\t26\t+\t.\tID=match00001;Name=scaffold_1_5;Target=NP_001121846.1 166 176 +;Gap=M11 \n+scaffold_1\tALN\tnucleotide_to_protein_match\t8295\t8300\t29\t+\t.\tID=match00001;Name=scaffold_1_5;Target=NP_001121846.1 177 178 +;Gap=M2 \n+scaffold_1\tALN\tnucleotide_to_protein_match\t8866\t8883\t31\t+\t.\tID=match00001;Name=scaffold_1_5;Target=NP_001121846.1 179 184 +;Gap=M6 \n+scaffold_1\tALN\tnucleotide_to_protein_match\t8980\t8994\t30\t+\t.\tID=match00001;Name=scaffold_1_5;Target=NP_001121846.1 185 189 +;Gap=M5 \n+scaffold_1\tALN\tnucleotide_to_protein_match\t9067\t9077\t27\t+\t.\tID=match00001;Name=scaffold_1_5;Target=NP_001121846.1 190 193 +;Gap=M4 \n+scaffold_1\tALN\tnucleotide_to_protein_match\t9976\t9988\t28\t+\t.\tID=match00001;Name=scaffold_1_5;Target=NP_001121846.1 194 197 +;Gap=M4 \n+scaffold_1\tALN\tnucleotide_to_protein_match\t10045\t10068\t27\t+\t.\tID=match00001;Name=scaffold_1_5;Target=NP_001121846.1 198 205 +;Gap=M8 \n+scaffold_1\tALN\tnucleotide_to_protein_match\t10229\t10252\t38\t+\t.\tID=match00001;Name=scaffold_1_5;Target=NP_001121846.1 206 213 +;Gap=M8 \n+scaffold_1\tALN\tnucleotide_to_protein_match\t10395\t10418\t23\t+\t.\tID=match00001;Name=scaffold_1_5;Target=NP_001121846.1 214 222 +;Gap=M5 I1 M3 \n' |
b |
diff -r 95ea8d97abb4 -r 37b5e1f0b544 test-data/output2.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output2.tabular Thu Jul 16 07:57:10 2020 -0400 |
b |
@@ -0,0 +1,7 @@ +# rID gID %id ExonL MisMch Unpair ref_l ref_r tgt_l tgt_r eScore IntrnL iScore Sig3/I Sig5/T # - X P DiNuc +NP_001121846.1 scaffold_1 11.11 91 24 0 1 27 17 107 54.8 0 0.0 66.40 5.40 0 0 0 0 . +NP_001121846.1 scaffold_1 65.85 122 14 0 28 68 418 539 210.7 310 -20.3 17.40 8.60 0 0 0 1 GT.AG +NP_001121846.1 scaffold_1 64.29 127 15 0 69 110 7124 7250 188.0 6584 -33.9 16.50 16.90 0 0 0 0 GT.AG +NP_001121846.1 scaffold_1 28.57 147 30 0 111 152 44187 44333 28.7 36936 -44.6 10.00 9.60 0 0 0 1 GT.AG +NP_001121846.1 scaffold_1 20.55 221 52 3 153 222 44595 44815 23.4 261 -18.9 14.00 9.60 0 0 0 1 GT.AG +@ scaffold_1 + ( 17 44815 ) NP_001121846.1 222 ( 1 222 ) S: 349.5 =: 37.8 C: 98.6 T#: 135 T-: 3 B#: 0 B-: 0 X: 0 Nexn: 5 |
b |
diff -r 95ea8d97abb4 -r 37b5e1f0b544 test-data/sqlite_taxdb/tax.ncbitaxonomy.sqlite |
b |
Binary file test-data/sqlite_taxdb/tax.ncbitaxonomy.sqlite has changed |
b |
diff -r 95ea8d97abb4 -r 37b5e1f0b544 tool-data/ncbi_taxonomy_sqlite.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/ncbi_taxonomy_sqlite.loc.sample Thu Jul 16 07:57:10 2020 -0400 |
b |
@@ -0,0 +1,2 @@ +#This file describes the storage location for the NCBI Taxonomy SQLite database file +#<unique_name> <description> <file_path> \ No newline at end of file |
b |
diff -r 95ea8d97abb4 -r 37b5e1f0b544 tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Thu Jul 16 07:57:10 2020 -0400 |
b |
@@ -0,0 +1,8 @@ +<?xml version="1.0"?> +<tables> + <!-- Locations of SQLite format NCBI Taxonomy database --> + <table name="ncbi_taxonomy_sqlite" comment_char="#"> + <columns>value, description, path</columns> + <file path="tool-data/ncbi_taxonomy_sqlite.loc" /> + </table> +</tables> |
b |
diff -r 95ea8d97abb4 -r 37b5e1f0b544 tool_data_table_conf.xml.test --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Thu Jul 16 07:57:10 2020 -0400 |
b |
@@ -0,0 +1,8 @@ +<?xml version="1.0"?> +<tables> + <!-- Locations of SQLite format NCBI Taxonomy database --> + <table name="ncbi_taxonomy_sqlite" comment_char="#"> + <columns>value, description, path</columns> + <file path="${__HERE__}/test-data/ncbi_taxonomy_sqlite.loc" /> + </table> +</tables> |