Repository 'isescan'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/isescan

Changeset 0:debe9c750cdd (2022-09-01)
Next changeset 1:19f42b3ea391 (2022-09-01)
Commit message:
planemo upload for repository https://github.com/pimarin/tools-iuc/tree/isescan commit d9d88c043b2d45877e3b0bffba6e709c9a0f0379
added:
isescan.xml
macro.xml
test-data/NC_012624.fna
test-data/TEST_1/test_1.fna.gff
test-data/TEST_1/test_1.fna.is.fna
test-data/TEST_1/test_1.fna.orf.faa
test-data/TEST_1/test_1.fna.orf.fna
test-data/TEST_1/test_1.fna.sum
test-data/TEST_1/test_1.fna.tsv
test-data/TEST_1/test_1.log
test-data/test_2.fna.gff
test-data/test_2.fna.is.fna
test-data/test_2.fna.orf.faa
test-data/test_2.fna.orf.fna
test-data/test_2.fna.sum
test-data/test_2.fna.tsv
test-data/test_2.log
b
diff -r 000000000000 -r debe9c750cdd isescan.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/isescan.xml Thu Sep 01 09:29:56 2022 +0000
[
@@ -0,0 +1,87 @@
+<tool id="isescan" name="ISEScan" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description> Insertion Sequence Elements detection in prokaryotic genomes </description>
+    <macros>
+        <import>macro.xml</import>
+    </macros>
+    <expand macro='xrefs'/>
+    <expand macro="requirements" />
+    <expand macro="version_command" />
+    <command detect_errors="exit_code"><![CDATA[
+        ln -s '$input_file' input &&
+        isescan.py
+        --seqfile input
+        --output 'results'
+        $remove_short_is
+        --nthread \${GALAXY_SLOTS:-7}
+        #if '$log_activate'
+            | tee '$logfile'
+        #end if
+    ]]>
+    </command>
+    <inputs>
+        <param name="input_file" type="data" format="fasta" label="Genome fasta input" help="Fasta sequence to analyse for IS"/>
+        <param name="remove_short_is" argument="--removeShortIS" type="boolean" truevalue="--removeShortIS" falsevalue=""
+               label="Remove incomplete IS elements"
+               help="Remove partial IS elements which include IS element with length &gt; 400 or single copy IS element without perfect TIR"/>
+        <param name="log_activate" type="boolean" truevalue="true" falsevalue="false" checked="True" label="Keep the log file"/>
+    </inputs>
+    <outputs>
+        <data format="txt" name="logfile" from_work_dir="results">
+            <filter> log_activate == True </filter>
+        </data>
+        <data format="tabular" name="summary" from_work_dir="results/input.sum"/>
+        <data format="tabular" name="all_results" from_work_dir="results/input.tsv"/>
+        <data format="gff" name="annotation" from_work_dir="results/input.gff"/>
+        <data format="fasta" name="is_fasta" from_work_dir="results/input.is.fna"/>
+        <data format="fasta" name="orf_fna" from_work_dir="results/input.orf.fna"/>
+        <data format="fasta" name="orf_faa" from_work_dir="results/input.orf.faa"/>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="7"> <!-- TEST_1 default parameters -->
+            <param name="input_file" value="NC_012624.fna"/>
+            <output name="logfile" value="TEST_1/test_1.log" lines_diff="70"/>
+            <output name="summary" value="TEST_1/test_1.fna.sum" lines_diff="2"/>
+            <output name="all_results" value="TEST_1/test_1.fna.tsv" lines_diff="1"/>
+            <output name="annotation" value="TEST_1/test_1.fna.gff" lines_diff="1"/>
+            <output name="is_fasta" value="TEST_1/test_1.fna.is.fna" lines_diff="1"/>
+            <output name="orf_fna" value="TEST_1/test_1.fna.orf.fna" lines_diff="1"/>
+            <output name="orf_faa" value="TEST_1/test_1.fna.orf.faa" lines_diff="1"/>
+        </test>
+        <test expect_num_outputs="7"> <!-- TEST_1 remove short is -->
+            <param name="input_file" value="NC_012624.fna"/>
+            <param name="remove_short_is" value="true"/>
+            <output name="logfile" value="test_2.log" lines_diff="50"/>
+            <output name="summary" value="test_2.fna.sum" lines_diff="1"/>
+            <output name="all_results" value="test_2.fna.tsv" lines_diff="1"/>
+            <output name="annotation" value="test_2.fna.gff" lines_diff="1"/>
+            <output name="is_fasta" value="test_2.fna.is.fna" lines_diff="1"/>
+            <output name="orf_fna" value="test_2.fna.orf.fna" lines_diff="1"/>
+            <output name="orf_faa" value="test_2.fna.orf.faa" lines_diff="1"/>
+        </test>
+        <test expect_num_outputs="6"> <!-- TEST_1 remove short is and no logfile -->
+            <param name="input_file" value="NC_012624.fna"/>
+            <param name="remove_short_is" value="true"/>
+            <param name="log_activate" value="False"/>
+            <output name="summary" value="test_2.fna.sum" lines_diff="1"/>
+            <output name="all_results" value="test_2.fna.tsv" lines_diff="1"/>
+            <output name="annotation" value="test_2.fna.gff" lines_diff="1"/>
+            <output name="is_fasta" value="test_2.fna.is.fna" lines_diff="1"/>
+            <output name="orf_fna" value="test_2.fna.orf.fna" lines_diff="1"/>
+            <output name="orf_faa" value="test_2.fna.orf.faa" lines_diff="1"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+      **What it does**
+      ISEScan is a python pipeline to identify Insertion Sequence elements (both complete and incomplete IS elements) in genome.
+      If you want isescan to report only complete IS elements, you need to set command removeShortIS.
+
+      **Input data**
+      ISEScan accept fasta file format as input
+
+      **Parameters**
+      You can remove the short partial IS elements which include IS element with length < 400 or single copy IS element without perfect TIR/>
+
+        ]]>
+    </help>
+    <expand macro="citations"/>
+</tool>
b
diff -r 000000000000 -r debe9c750cdd macro.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macro.xml Thu Sep 01 09:29:56 2022 +0000
[
@@ -0,0 +1,23 @@
+<macros>
+    <token name="@TOOL_VERSION@">1.7.2.3</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">21.05</token>
+    <xml name="version_command">
+        <version_command><![CDATA[isescan.py --version]]> </version_command>
+    </xml>
+    <xml name="xrefs">
+        <xrefs>
+            <xref type='bio.tools'>ISEScan</xref>
+        </xrefs>
+    </xml>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">isescan</requirement>
+        </requirements>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1093/bioinformatics/btx433</citation>
+        </citations>
+    </xml>
+</macros>
b
diff -r 000000000000 -r debe9c750cdd test-data/NC_012624.fna
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/NC_012624.fna Thu Sep 01 09:29:56 2022 +0000
b
b'@@ -0,0 +1,605 @@\n+>gi|228288719|ref|NC_012624.1| Sulfolobus islandicus Y.N.15.51 plasmid pYN01, complete sequence\n+TTTTTAAAAACCCTGCATAGTGATGATCTCCAAAAAAGAGTTTTTACCTATTCTTGCATACTCTATTCCA\n+ACATATGTACTAGTGTATCCATCATTTTTTGTTAACACTCTAGCAAGGGAATTCCCATACTGGGAGGCAT\n+TCCTACTACTCTCCCTACCCTTTCTAGGGAGAGTAGTAGGATCCCTAATTTACCAATTCTTCAAATCCTA\n+CATCATACCGCTCCTAACCCTTAGCATACTCACGCTCCTACAAATCGATTTAAACATAATATTTCCCGTC\n+AGATTCTTGATCGGCATAATCTTCGGACTAATGACCAGCTACGCAGTTGACAATGCTGTTAAAACGAACA\n+ACTTAGTGTTAGGTTTAACCACAGCGGGCTGGAGTATAGGATGGATACTATCATATATAGCATATACAAC\n+AATCCATAATTGGAATCAAATATGCATAATCAGCAGCATAATAATAGTATCCATAGCACTCCTAGACAGA\n+AGAGTAAGCCTAGATAAAATCAAAATCAATATAAGCTTACCAAAAACGTCATCCATAATAGTCTACTTCT\n+CAGCGTTAACCCCGGCATTCACACTACAAATAATTCCGAGCATATTCGAAAAAGCACACACAACATGGCT\n+AATACTCCCTTCGTATCTACTTTCCATAGCTGCGTATACTATACTACCAATAATAGCAAGCAAAATAGGA\n+CTAAAGAGATGCATCATAATAACAACCCTAGGAATACTAATTAGTGGAATAACAACATTCCTAATAACAC\n+CCTACACCCTATTACCATATACCTCACTAGGACTAGGAATACTTTCACTAATACCGAAATACTTAACAAC\n+GAAAAACGAAAAAGCCAACACTCTGGGAATAGCCCTAAATATAGGATCAATAGGAGGACTAATAATACCA\n+ACGCTATACACAATAATACCGACAAGCCCAGAATCAATACTCATAATAACTTCAATAATATTGTTAACAA\n+TCTAAAAAATTGTAATGAGGGTTTTTAAAAACCCCAAAAGTGAAGGTGGTGAAATGATTTCGCCTGACTT\n+AGCAATAAAAATTCTTCTATTAGTTCCTTCAGTAATATTCTTCTTCTACAGTGCAGTATATTTAATGTTA\n+TTTGAGCTAAATGTACAACCTAAACTGAGCAAATTCTACAGAAATACTAGTCTAGTTTTAGCTGGAGGTG\n+GTATACTATTATTGACAATTTATTTAATGATTTAGATAGCATAGATGATATAATTTTAATAATATTGGCA\n+AATGCTAACGGAACTGTTAAAGAAATGGCAGAGAAAACGGGAATAAAAGAAGAAGCTGTATATCATCTTT\n+TAGAATTTCTAACAATAGCTGGGATTGTGAAGAAAGAGAATGATAGATATTCTATAGATAAAACAATGAG\n+AACTATAGCTCAACTTTTGATAGATTTTAAAGACGGAGATGACGTGAATTGAAGAAAATAGTAATAAAAA\n+CAGAACTTCCTTATAATTATAGAAAGCTTAAGGGATTTTTCATTTCACCTGGGGTGCTCAGATTATTTTA\n+TGACGTAAAGAGTGTAGAAGTTGCAGGGAATTTATCATATATTATTAACAACAAGTACAATGCAATAATG\n+TATTTCAGTAATGTTGATATTGTATGGGAGATTTATAATGGTAGATTAAAGGATAAGATCTCAGTATGGT\n+TATATCCAATTGGTGATAATACAGGATTACACTTGAAATTCGAAACTAATAGAATACTCCCGCTAAAGAA\n+ACCTCTAGAAAATGAAGTCAAAGCAGGAGTAGAATTGCTAAAAAGTTTGTTAGATGCTTTGAGGAGGTTC\n+TGATGAAGTTAAGAACCCATTACATCTTCTCAACCGGTTTATTGACGCTTCTGGACTCTGTACTCTTTCA\n+TGAATATTTTTACTACGCTTTAATCTTGAGCGGAATAGTTTCGGTAATAGGTAATTCCTTGATTGATAGG\n+ATTGGCCATAAGGAGATTGCCACTAGGTATGGGTATATCCCGGTAAGGACACCTTTAACCCATACAATTC\n+CTAGAAGTGTAGTTTGGGGTATTGTCTCCGTAGTCCCGGTCTTTATTCTCTTATTGATTTATTATTATGG\n+GTTTAGCTATCACGAGTACTATTTCTCCCTTAGCAATAAGGTGGTGTTGTTAATATTGTTAAATGGTGTA\n+GTTGTTGGGCCCTCTCATTTGTTCCTGGACGTATTTACGGAAAGGGGAATATATCATAAAGTAAATGGTA\n+AGTGGAGGAGATTCGCATTGGCACACTTCTCCTATGATAACCCCTTAGTAAATGGATTAGCAATCTTACT\n+AGGAATTATAATGCTGCTAGCAGCACTTTATCTACATAACTACCATTATTACAACTATTATTTCTAACGA\n+GATAATAATAGTATAAAGATAAAAACAAAAAAGTTTTTAAATCTGTTTTTACAGAAGTAGAAATGGGGGA\n+TGAGGGGATGATGAGTAGGGTCAGAATTGACCTATGATAGGACACTCGGCAGCTGACCTGACCCCCTCAT\n+CCCTAATTTATTCTTACCTTATCTAACTATTAAGCTTTTCTTCTCATTGAATTTTGAAAAATTGAGCAAA\n+GAATGAGGTTGGATCCTAACGTTAAATTTACGACGTGAGAAAAATTCAATAAACTCCCCAGTTTGCTAAG\n+AAAAATTCTTTCTTCTAATTTCTTAATCTCATCTAATTTTGTATTATTAGCTAATAATGTACAATAAATG\n+CAGCTGAGGAGTGATATGATGCAACATAGATACGCTAAACATTCTCTATAATAAACTGAAATATGATAAT\n+GTATTCGAATTGAATTAAGCATCTTTATAATTAGGCAAATTTGTATAACACAAATATAAGTGTGCGTAAT\n+TATTGCAGAAATAGTTATCAAGACACTTGTTAAAATAAAAGAAATGATGAGAAGAGTTTTATTTCTCATA\n+ATATTATAAATTTTATTTAAAAAGCTCAGTTGAATTAACTCCAGTTTACTCTACAAAAATTCAAAGTAGT\n+TAATCCTGCACGGATCTCAAATTTTTTAGTTAAAAGAGCAAGGTAATGCGATAATGACCATAAATACTAC\n+ACTTTGTTAACCCTACTATTCCCAGTTTGGGAACTCATTTTCAGCCCCCTTCTAAGGAGGTAACACTTTG\n+TTAACCCTAGTTAAGGTTTATTAAGGGAAATTTCTCAATACAATTTTATGGAAGTTAAGATAGAGAAACA\n+TAAGTCAAGGTTTACCATTACACAATTAATTTTAATGGCTATGGCTAAAGCTCCTGGGAGCTGCTGTAGT\n+TTGGAATATTTACATGAGAAAACTGGTGTTGATAAGAAGGAGCTGCTAGTATATCTAACAAGGTTAGCCA\n+AAAGAGGAATAATAGAAAGGAAATGGCATAAAAGTAGGGCAGGGAAAGAGAGAATGTATTGTCTGAAGTA\n+TAAGGAGGATATACTATGAGCCCGCCAAATGATAAATTTTTAGAATACGCTAAAGCGCTTTTAGAAACTG\n+CAGAATCTGATTTAGATGCAAGTAAATTTCTTTGGGATTGTAATTATCAAGCTCAGTCAGTGTTTTATCT\n+TCAGCAATCAACCGAGAAGATATTTAAAGCGTTTAGGAGCATGTATCAGTACCTCTTTGTAGAATTTCCC\n+GAATTTATTACAGAGGAACTGATAGCTTCAAGTAATAGTAGAGTTACTAACTTGATGCCCTATCTTACAC\n+TGTTAAATTTAAGGTTAAAAGAGAGAAAACTGAATATTAAAGGCCTGGAAGATTTACTGAAAAAGGA'..b'G\n+ATAGACTGAGAAAAGCTCACGACGATGTTTACGTAATAGTCATAGACCCTATGAGTAACTATAACAGATT\n+CTTTGAAAATGAAGCAGATCTTAACATCGTTTTCGGAGATAATGATTATTTAGGATTGGATCCTGTTAGT\n+TTAGCTAACGAAGGAGTAGTAAGCTCTGGTGACATTGCTGATTTCTTAATAGAATCATATGGAATTCCAG\n+CTGAGCTTCGAGGCATTTTAGTAAGCCAGCTCGAGCAAAATAAATCGTTAAAAGAGCTCATGGACAATTT\n+AGAAGACTTAGCAAGCAAGAAATTTGCTACTGAATACAGAAAATTAGAGAACTTCCTATTGAATATGACA\n+ACGGGTTCTGATAAGTATGTCTTTCAAGGATCTCCGCCTAGTCTAAAGAATAAGAAATTCGTCATTTTGG\n+GATTGAAAACTGAGGACACTAGGAAGAAGCGTTTGGCAGCAACTATGCTAATGCTTTATGCATATTCACT\n+AATTAATAAACTCCCCAAGGATGTCGAGAAATTAATCCTCATCGACGAAGCCCACTTCCTCTTCGAGTAT\n+CAGTCAGTAGCTAAGATCATTGCGATCATTTACCGAACAGCCAGGGCTTTACGGACTTCAATGATCACTA\n+TGACACAACTGATACAACATTATAATATGAACAACTATAGTAGAGAAGCCTGGCAATTGGCTAACAACAA\n+ACTAATATTAAAACAAGAGAAGGAGGCAAAAGACGACCTAGTTAATTTAGCTCATCTTTCTGAAGAAGAG\n+ATTGACTATATCCTGAAATCTTCACGTGGAAGGGGAATTCTGAGGACTGGCGCAATTACTACACACATTC\n+AAATCCAACTCACTGACGAAGAAAAAGAACATTGGAGGACTGAGTGAAATGAGCTTAGTTCAGCTCATTG\n+AAAAAGCTGCGAAGAAATATAATATCAAAATAAATTCTCTCCCTAATGGTGTGATAATTCTTGTAAAAAA\n+TGACATAGGCTATGTGCAAATAGCTGCAGTTAGAAATGTTTACTATGTCAGATACTTAACGAAAAATGAA\n+GCGTATATTATACGTAACTTAAACGAAAAGATCATAGAGCTTATACTGGAAGAAAAGTTAGAAGAAACTG\n+AAGCTATCAAAATCCCTGATGTTTAATAAAAGTAAAGTTCTTTCAATTTCTTTCATTCTCTCTTCTTCAA\n+ATAATTTTTCTTTCAATTTCTGGTCAGATAAATACTTTAGTAATATCTTCTTTCTCGTCTCAGCTTTTCC\n+ATACATAATCCCTTTCTTTTTCAACAACTCATATATTCTCAATGCCTCTTCCTTGTCACCACTATATTCA\n+TATATATAATGTATAATAGGATCCTCCCTAACCACGTGCCTCAGAAGTTTCTTCTTCTGCCTAATCGCGT\n+GTCTGCCTAGTCCTTTCATTTTTAAGAATTTGATATAGTCTCTGGTTACCACTTTTCCTTCTCTTTCTAG\n+AATATATTCGTAAATCGCTAAGTAAAAAGACGCGTCCAATTTTTTCTCATCAATCTTTTGCAAAATTTTA\n+TAGAACATCTCTTTCTCTTTTTCTGTCATAACGCTCTGAAGATAATTCAGTACTATTTTCTTCATATCTT\n+TGATTTCTTTCTCAGTTTCTCTCTTTTGAGAACTTGCTCTTTCGTAATACTCTAGATTTGAATAATCTTG\n+ATATGTATAATTATCAAAGTCTACTACTTCTCCCGTACACGAGTCCACCAGAAACCCTTCGCTGTTCCTT\n+ACTTTGTAGCACTCTTGCATCAGAAAATAGAAGTATTTTAAAAGCCTGTAAGGATTTTGATGGGGAAATC\n+AGTATTACCAGTAAACTCCCCAGTTTACTAAGAAAAAAGACTTAACTTACCCAAACTGTCCCGTTCCAGA\n+TCGCTATTATTTCTGTTTTGACTGCTGCAGTGTATTGAAATGTAGCATAGAATATGTTAGGTATGTAATT\n+CGCTGACGTGAAGCATAGAGTTTCATGTAAATATGGAATATGTTGTCCAGTAGTATTAAGATAGAAGTAA\n+GTCGTCTCATTGTTAAGTGGATTAAAATAGTAGTATATGTTGCCAAACGGGACTTTATTGAACCCACTGT\n+AATATAATGCATTGTAAATATCGACAAGATATATTTCATAATTTCCAGTGATATTCATAAATATTGTATT\n+ATTGTAAATCTGTGGGATTCTGCTTATGTTAAAGAACATTATCTGATAGTTGAAATATTGCAATTTAGGC\n+CATGCAGACAAATTTAGGATATTAGCAGTTACTAAGCTCCAGTTCTCGTAGAAAGATTCAAGCTCTAGAA\n+GATTAGTAAAAACAGAAGATGTAGTATTGAAAGTATACATAACAAATTGTGATGCGAGAACGAAATACTC\n+AAACTTCCAATATTGAAAGTCTGTTTTATTAATTGTTTTCATGATATAATTATACAATGTGTGTGCTAAC\n+GTAGCGTTTTGCTCTAAATAGAACCACGAAATCGCTGCAGACTTGCTATAAATCATTTTTTGAGAGATCC\n+AGTAAGGAGGCTGTACTGTATTATCCTGAACGTTGAAAGTAAGATAGTAGTTTATTGTGTTCCAGTCTTG\n+TTGTGTGTTTAATAGTGGAATAATTTCTATATTACCAGCGTGCCAAATTCTTTCAATAGTATAATTTGCT\n+ATCTGTATGTGATCGTAAGTAACTATCGTAGTACTATTCGCAATCGGATCTGAGCTCCACGTAGTATACG\n+TAAAGTAAGCTGTAATATTTCTAGAATTGTAGATTTTATTATTAATAAGTGTCTGTGGATCTGAACCGTT\n+GTAGACTATTACCATTATTGGTAATGTCACGTTATACTCATACCAGTTGAAATTATAACTTGTAGTAGAT\n+AAATAAGGCCCTGTAGGGTAGTATATATAATAAATATATGTTGTATTATTAACAGTACGTTGATAATGTT\n+TATATCCAGCTCCAGCACTTATACTATAGCTTCCATAAACTTCTTGTGTACTTTCTGGAGGTACTGTAAT\n+CGTTCCATAGATGTAATTAGGCCCATATTCCCCTCCATACCAATAGAAAGTTGTCGCGAAACTCTGAGAA\n+TTTATCAGCTGATTATTTGCATACAGATAGGCTGTACCCCAAACTTTCATCGCGTAATACTCATAAACAT\n+AAGTAACGTTACCTTGATTGTGCGTATATTTATTATATAAATATGTATATTCGTTATATTGAACGTTTAC\n+GCTAGTTTTTACAGAAGTTTCATTAATTATTTTGTAAACTGGGGGGTTTACTGAAATTAGATCTGACGGA\n+GGATAGCTCCAGCCAGACTGAAATCCATTAGGCGCCTCGAGTGTTTTCCCATATTTTACTATTATATCCC\n+ATTTTGAATAGTTTTCTGTATCATTTTCATTTAATGGGAACGGCTGGAGTGAAGTATTCTTATAATTGGT\n+AATATTGACATAGGCACTGCTAGCTATAGATAATAGAGCACCGTATGGCCCTGGAGCAATTTCACTTGCG\n+TTTCTAGCCAATGCATCAAATAAGAGATTTAACATATTGTTGTCAGGCTTTGAGAAGTTGAATAAGATAT\n+ATGCTTCTGGAGAAATATTGTAATAAGGTGCTGGTAGATTACTGTAGCTTCCCCTAACATATTGTAGATC\n+TTGTAACGAAGGGCTTAATGTTACTGTTTTATTATAAATGTACTTAGGCCCTGGGACGCTGTAAGACCCA\n+GAACTTCCTCCTCCACTTCCTATTTGTATTGAAGAATTCTGTTGAGGTACTTTTAGATTTGCAAAACTCG\n+CTATAAATGCCAGAATTATTATTATTGCTATAAATAAAACAGAGAATCTTCTAATATCCATTCCGAATAT\n+ATTATCTCCCATTTTCACTTCCCTACCTTTAGGGG\n'
b
diff -r 000000000000 -r debe9c750cdd test-data/TEST_1/test_1.fna.gff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_1/test_1.fna.gff Thu Sep 01 09:29:56 2022 +0000
b
@@ -0,0 +1,7 @@
+##gff-version 3
+gi|228288719|ref|NC_012624.1| ISEScan insertion_sequence 30355 31343 . + . ID=gi|228288719|ref|NC_012624.1|_IS_1;family=IS5;cluster=IS5_222
+gi|228288719|ref|NC_012624.1| ISEScan terminal_inverted_repeat 30355 30382 . + . ID=gi|228288719|ref|NC_012624.1|_IS_1_TIR;parent=gi|228288719|ref|NC_012624.1|_IS_1
+gi|228288719|ref|NC_012624.1| ISEScan terminal_inverted_repeat 31316 31343 . + . ID=gi|228288719|ref|NC_012624.1|_IS_1_TIR;parent=gi|228288719|ref|NC_012624.1|_IS_1
+gi|228288719|ref|NC_012624.1| ISEScan insertion_sequence 32413 33650 . - . ID=gi|228288719|ref|NC_012624.1|_IS_2;family=ISH3;cluster=ISH3_198|ISH3||gi|15898236|ref|NP_342841
+gi|228288719|ref|NC_012624.1| ISEScan terminal_inverted_repeat 32413 32456 . - . ID=gi|228288719|ref|NC_012624.1|_IS_2_TIR;parent=gi|228288719|ref|NC_012624.1|_IS_2
+gi|228288719|ref|NC_012624.1| ISEScan terminal_inverted_repeat 33607 33650 . - . ID=gi|228288719|ref|NC_012624.1|_IS_2_TIR;parent=gi|228288719|ref|NC_012624.1|_IS_2
b
diff -r 000000000000 -r debe9c750cdd test-data/TEST_1/test_1.fna.is.fna
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_1/test_1.fna.is.fna Thu Sep 01 09:29:56 2022 +0000
b
@@ -0,0 +1,40 @@
+>gi|228288719|ref|NC_012624.1|_30355_31343_+ IS5_222
+AGAATTATACAAAAATTTATAAATAATTATACCGAGTATTACTCATGGGAAAGAGTAAGT
+ACAAGAGGGATTGGAGCAAGTACGACGAGAACGTTATAACTAGATACGAGCTAATGTTCC
+CCTTCTACGTCTTCCAACACTGGTGGGAATTACTAGCAGAAGAGAATAGGAATGCCAAGA
+AAACCTACAAGGCGCCAAAGGAGTTCAACGACTTCCTAGCGTTCCTGCACTTGTTCCTAC
+CTTATAGGGCCATAGAAGGAGTATTGAGAGCATTAGAAAGACTGAAAATCATCCCAACAA
+GCCTAGACTACTCAACAATATGGGAAAGAGTAAGAAACATGAACATAAAATTCCCAGAGG
+CAAATGACCAACTTGAAGTAATAGCAGACGCAACGGGAATAAGCACAAACAAGGGAGGAC
+AATACATTATAGCAAAATGGGGAAAAACCAAGGACTCAAAATTCCTCAAGATCGAAATAG
+TAATGGATAAGGACCAATTCAACGTAATAAACGCTGAAGTAACCAGCAACGAGGTTCAGA
+CTGCAGTTAAGACGGTTAAGGATTTACAAGATAAGGGAAAGAAGGTCAAGAAGTTTTATG
+GAGATAAAGCTTATGATGCTAATGAGGTTTACAAGACTGGGGTTGAGGTTGTTGTTCCAC
+CTAGGGAGAACGCCTCCACTAGACGCGGTCATCCTGCTAGGAGAAAGGCTGTAAGGGAGT
+TCAAGAGGTTGGGTTATAATCGTTGGAGGGAGGAGAGGGGTTACGGTGTTAGGTGGAGGA
+TTGAGTCCTTATTCTCTGCTGTGAAGCGTACTTTTGGGGAATCTGTTAGGGCTACAAGTT
+TTTTAGGACAAGTGGTTGAGGCTAAGCTCAAGTTCTGGGCTTATGCATGGATGGTCCACT
+TGGCTAATTCTTTAGTTGGTAGAGCTCCGGGTATTAGGGTGTGAGCTTGCGAATAACGTT
+GAAATAAATATTAATTACTGAAAAATTCT
+>gi|228288719|ref|NC_012624.1|_32413_33650_- ISH3_198|ISH3||gi|15898236|ref|NP_342841
+CCCTAATACGGTAGACCCAAAATCGTATTATGAAAAATAATTCTGACAGAATAATATCGA
+TTCATGATAGATAACAGGATTTATATGAGTATTAAGAATTCAGTTGAAGAAAGGTTTAAG
+CTGAGGGAGATGTGGTATTACCACGTGAGTAAGTGAGATGGTAACACCGGTACTCCCTCA
+CCAAAATAACATTCAACAAATAGGATATAAATTACTTTCCATGCTGAACTTCAAGGGAAA
+GAAAGGGGAAGAGGTGGCGAGAACCCTCATCTCAGCGTGTTTATGGAACGATTCGGTGGA
+AAGCAAGTCGAGGGCGTATGGCGTGTCCCCACAGACCGTGAGGAATTACGTGGAGGAGCA
+AGGGATGGAAGTTATTGAGAAACTCTTGGAAAGCGCCAGGAAGATATCCTTGAAGGTACT
+GAAGGGAGTCAAGGAGATAGACGTCTCAATAGACTGGACAACCAAGACCTGGTACGGGAG
+ACCGGTGGGAGGGCTCGGGAGTTCGGAGGAGGGAAACTCTTGGAACTACGCAACTGCGAC
+GACAAAGTTTAATGGGAAAGTGCTCCTACTGGCCTTCGTCACTCAAGTCAAGGGGATGAC
+TAAGGAAGAGATCGTGAAGGCCCTCGTGGAGCAAGTCGTCGCGATGGGGTTCAAGATAAG
+GTTGATAACTCTTGACGCAGGTTTCTATACTGTTGATGTGCTCAACTTCATTTCACAGTT
+TAAGTATATAGTTGCTGTGCCTGTTGGGGATGTTAAGGTTTATGAGGAGTTTGACGGGGA
+TTACGCAACTAATAGTAAGAGGCATAGGAGGGATGAGCAGGTCAAGTTCAGGCTTCTCGT
+GTACAGCAAGGAAAAAGTGAGGAGAAAGAAGAAGAGTGTTGTTTATTTTGCTAGGGCTAC
+TAATCTAGACCTACCGAAGGGGGAGGTGTTGGATTTGTACAATAAGGTAAGGGGTCCCAT
+AGAGACCTCTTATAGGAACATTAAGGCTTTTCTTCCATTTACTAGTTCTACTAAGTTTGT
+TTTCCGCACGTTGATCTTCGTGCTGGCCGTGGCCTTCTACTCCCTGTACACCGTGTTCAA
+GGGGGAGGTGAGGAGGGAGCAGTTCAGGATACTCCTAATACTCTTGTTTTCTGATGATTT
+ATTCTATCTAAAAGATTTTCTATTTAAATCAATAGAACCGCTTATTAATAATATAGATTT
+ATTTTCAAGGAGGTGATTTTGGGTCTACCGAATAAGGG
b
diff -r 000000000000 -r debe9c750cdd test-data/TEST_1/test_1.fna.orf.faa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_1/test_1.fna.orf.faa Thu Sep 01 09:29:56 2022 +0000
b
@@ -0,0 +1,13 @@
+>gi|228288719|ref|NC_012624.1|_30399_31298_+
+MGKSKYKRDWSKYDENVITRYELMFPFYVFQHWWELLAEENRNAKKTYKAPKEFNDFLAF
+LHLFLPYRAIEGVLRALERLKIIPTSLDYSTIWERVRNMNIKFPEANDQLEVIADATGIS
+TNKGGQYIIAKWGKTKDSKFLKIEIVMDKDQFNVINAEVTSNEVQTAVKTVKDLQDKGKK
+VKKFYGDKAYDANEVYKTGVEVVVPPRENASTRRGHPARRKAVREFKRLGYNRWREERGY
+GVRWRIESLFSAVKRTFGESVRATSFLGQVVEAKLKFWAYAWMVHLANSLVGRAPGIRV
+>gi|228288719|ref|NC_012624.1|_32517_33587_-
+MIDNRIYMSIKNSVEERFKLREMWYYHVRSEMVTPVLPHQNNIQQIGYKLLSMLNFKGKK
+GEEVARTLISACLWNDSVESKSRAYGVSPQTVRNYVEEQGMEVIEKLLESARKISLKVLK
+GVKEIDVSIDWTTKTWYGRPVGGLGSSEEGNSWNYATATTKFNGKVLLLAFVTQVKGMTK
+EEIVKALVEQVVAMGFKIRLITLDAGFYTVDVLNFISQFKYIVAVPVGDVKVYEEFDGDY
+ATNSKRHRRDXAGQVQASRVQQGKSEEKEEDVVYFARATNLDLPKGEVLDLYNKVRGPIE
+TSYRNIKAFLPFTSSTKFVXPHVDLRAGRGLLLPVHRVQGGGEEGAVQDTPNTLVF
b
diff -r 000000000000 -r debe9c750cdd test-data/TEST_1/test_1.fna.orf.fna
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_1/test_1.fna.orf.fna Thu Sep 01 09:29:56 2022 +0000
b
@@ -0,0 +1,35 @@
+>gi|228288719|ref|NC_012624.1|_30399_31298_+
+ATGGGAAAGAGTAAGTACAAGAGGGATTGGAGCAAGTACGACGAGAACGTTATAACTAGA
+TACGAGCTAATGTTCCCCTTCTACGTCTTCCAACACTGGTGGGAATTACTAGCAGAAGAG
+AATAGGAATGCCAAGAAAACCTACAAGGCGCCAAAGGAGTTCAACGACTTCCTAGCGTTC
+CTGCACTTGTTCCTACCTTATAGGGCCATAGAAGGAGTATTGAGAGCATTAGAAAGACTG
+AAAATCATCCCAACAAGCCTAGACTACTCAACAATATGGGAAAGAGTAAGAAACATGAAC
+ATAAAATTCCCAGAGGCAAATGACCAACTTGAAGTAATAGCAGACGCAACGGGAATAAGC
+ACAAACAAGGGAGGACAATACATTATAGCAAAATGGGGAAAAACCAAGGACTCAAAATTC
+CTCAAGATCGAAATAGTAATGGATAAGGACCAATTCAACGTAATAAACGCTGAAGTAACC
+AGCAACGAGGTTCAGACTGCAGTTAAGACGGTTAAGGATTTACAAGATAAGGGAAAGAAG
+GTCAAGAAGTTTTATGGAGATAAAGCTTATGATGCTAATGAGGTTTACAAGACTGGGGTT
+GAGGTTGTTGTTCCACCTAGGGAGAACGCCTCCACTAGACGCGGTCATCCTGCTAGGAGA
+AAGGCTGTAAGGGAGTTCAAGAGGTTGGGTTATAATCGTTGGAGGGAGGAGAGGGGTTAC
+GGTGTTAGGTGGAGGATTGAGTCCTTATTCTCTGCTGTGAAGCGTACTTTTGGGGAATCT
+GTTAGGGCTACAAGTTTTTTAGGACAAGTGGTTGAGGCTAAGCTCAAGTTCTGGGCTTAT
+GCATGGATGGTCCACTTGGCTAATTCTTTAGTTGGTAGAGCTCCGGGTATTAGGGTGTGA
+>gi|228288719|ref|NC_012624.1|_32517_33587_-
+ATGATAGATAACAGGATTTATATGAGTATTAAGAATTCAGTTGAAGAAAGGTTTAAGCTG
+AGGGAGATGTGGTATTACCACGTGAGTAAGTGAGATGGTAACACCGGTACTCCCTCACCA
+AAATAACATTCAACAAATAGGATATAAATTACTTTCCATGCTGAACTTCAAGGGAAAGAA
+AGGGGAAGAGGTGGCGAGAACCCTCATCTCAGCGTGTTTATGGAACGATTCGGTGGAAAG
+CAAGTCGAGGGCGTATGGCGTGTCCCCACAGACCGTGAGGAATTACGTGGAGGAGCAAGG
+GATGGAAGTTATTGAGAAACTCTTGGAAAGCGCCAGGAAGATATCCTTGAAGGTACTGAA
+GGGAGTCAAGGAGATAGACGTCTCAATAGACTGGACAACCAAGACCTGGTACGGGAGACC
+GGTGGGAGGGCTCGGGAGTTCGGAGGAGGGAAACTCTTGGAACTACGCAACTGCGACGAC
+AAAGTTTAATGGGAAAGTGCTCCTACTGGCCTTCGTCACTCAAGTCAAGGGGATGACTAA
+GGAAGAGATCGTGAAGGCCCTCGTGGAGCAAGTCGTCGCGATGGGGTTCAAGATAAGGTT
+GATAACTCTTGACGCAGGTTTCTATACTGTTGATGTGCTCAACTTCATTTCACAGTTTAA
+GTATATAGTTGCTGTGCCTGTTGGGGATGTTAAGGTTTATGAGGAGTTTGACGGGGATTA
+CGCAACTAATAGTAAGAGGCATAGGAGGGATGAGCAGGTCAAGTTCAGGCTTCTCGTGTA
+CAGCAAGGAAAAAGTGAGGAGAAAGAAGAAGAGTGTTGTTTATTTTGCTAGGGCTACTAA
+TCTAGACCTACCGAAGGGGGAGGTGTTGGATTTGTACAATAAGGTAAGGGGTCCCATAGA
+GACCTCTTATAGGAACATTAAGGCTTTTCTTCCATTTACTAGTTCTACTAAGTTTGTTTT
+CCGCACGTTGATCTTCGTGCTGGCCGTGGCCTTCTACTCCCTGTACACCGTGTTCAAGGG
+GGAGGTGAGGAGGGAGCAGTTCAGGATACTCCTAATACTCTTGTTTTCTGA
b
diff -r 000000000000 -r debe9c750cdd test-data/TEST_1/test_1.fna.sum
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_1/test_1.fna.sum Thu Sep 01 09:29:56 2022 +0000
b
@@ -0,0 +1,4 @@
+# seqid                                                      family         nIS %Genome          bps4IS          dnaLen
+gi|228288719|ref|NC_012624.1|                                IS5              1    2.34             989           42245
+gi|228288719|ref|NC_012624.1|                                ISH3             1    2.93            1238           42245
+input                                                        total            2    5.27            2227           42245
b
diff -r 000000000000 -r debe9c750cdd test-data/TEST_1/test_1.fna.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_1/test_1.fna.tsv Thu Sep 01 09:29:56 2022 +0000
b
@@ -0,0 +1,3 @@
+seqID family cluster isBegin isEnd isLen ncopy4is start1 end1 start2 end2 score irId irLen nGaps orfBegin orfEnd strand orfLen E-value E-value4copy type ov tir
+gi|228288719|ref|NC_012624.1| IS5 IS5_222 30355 31343 989 1 30355 30382 31316 31343 24 20 28 0 30399 31298 + 900 1.7e-37 1.7e-37 p 1 AGAATTATACAAAAATTTATAAATAATT:AGAATTTTTCAGTAATTAATATTTATTT
+gi|228288719|ref|NC_012624.1| ISH3 ISH3_198|ISH3||gi|15898236|ref|NP_342841 32413 33650 1238 1 32413 32456 33607 33650 48 34 44 0 32517 33587 - 1071 2.3e-147 2.3e-147 c 1 CCCTTATTCGGTAGACCCAAAATCACCTCCTTGAAAATAAATCT:CCCTAATACGGTAGACCCAAAATCGTATTATGAAAAATAATTCT
b
diff -r 000000000000 -r debe9c750cdd test-data/TEST_1/test_1.log
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_1/test_1.log Thu Sep 01 09:29:56 2022 +0000
[
@@ -0,0 +1,43 @@
+/usr/local/bin/FragGeneScan -s input -o results/proteome/input -w 0 -t illumina_5 -p 1
+Using 1 threads.
+no. of seqs: 1
+Clock time used (by 1 threads) = 0.00 mins
+prepare gff file..
+Time elapsed: 0 hours 0 minutes and 0 seconds.
+ISEScan starts at Wed Aug 31 15:18:46 2022
+predict and translate genes from genome sequence into protein database using FragGeneScan program
+
+Begin to translate genome into proteome.
+Translating genome into proteome for input , return  0
+
+Finish translating genome into proteome. Wed Aug 31 15:18:46 2022
+
+Begin to phmmer search against proteome database. Wed Aug 31 15:18:46 2022
+Finish phmmer searching /usr/local/bin/pHMMs/clusters.single.faa  against results/proteome/input.faa , output results/hmm/clusters.single.faa.input.faa
+
+Finish phmmer searching against proteome database. Wed Aug 31 15:19:05 2022
+
+Begin to profile HMM search against proteome database. Wed Aug 31 15:19:05 2022
+Finish Profile HMM searching /usr/local/bin/pHMMs/clusters.faa.hmm  against results/proteome/input.faa , output results/hmm/clusters.faa.hmm.input.faa
+
+Finish profile HMM searching against proteome database. Wed Aug 31 15:19:17 2022
+pred begins at Wed Aug 31 15:19:17 2022
+Begin addNonORFcopy at Wed Aug 31 15:19:18 2022
+Finish addNonORFcopy at Wed Aug 31 15:19:18 2022
+Begin addNonORFcopy1 at Wed Aug 31 15:19:18 2022
+Finish addNonORFcopy1 at Wed Aug 31 15:19:18 2022
+getFullIS() begins at Wed Aug 31 15:19:18 2022
+Start typing IS elements
+The single-copy partial IS element with evalue > 1e-50: isLen=989 IS5 bd=[30355, 31343] orf('gi|228288719|ref|NC_012624.1|', 30399, 31298, '+') evalue=1.7e-37
+Finish typing partial IS elements
+Begin removeOverlappedHits at Wed Aug 31 15:19:18 2022
+gi|228288719|ref|NC_012624.1|: no intersected hits found
+Finish removeOverlappedHits at Wed Aug 31 15:19:18 2022
+Begin reading protein database at Wed Aug 31 15:19:18 2022
+Finish reading protein database at Wed Aug 31 15:19:18 2022
+Write IS elements from all sequences in input into one result file under results
+Write 2 IS copies to results/input.csv and results/input.tsv and results/input.raw
+Write summarized result to results/input.sum
+End in pred Wed Aug 31 15:19:18 2022
+Both complete and partial IS elements are reported.
+ISEScan ends at Wed Aug 31 15:19:18 2022
b
diff -r 000000000000 -r debe9c750cdd test-data/test_2.fna.gff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_2.fna.gff Thu Sep 01 09:29:56 2022 +0000
b
@@ -0,0 +1,4 @@
+##gff-version 3
+gi|228288719|ref|NC_012624.1| ISEScan insertion_sequence 32413 33650 . - . ID=gi|228288719|ref|NC_012624.1|_IS_1;family=ISH3;cluster=ISH3_198|ISH3||gi|15898236|ref|NP_342841
+gi|228288719|ref|NC_012624.1| ISEScan terminal_inverted_repeat 32413 32456 . - . ID=gi|228288719|ref|NC_012624.1|_IS_1_TIR;parent=gi|228288719|ref|NC_012624.1|_IS_1
+gi|228288719|ref|NC_012624.1| ISEScan terminal_inverted_repeat 33607 33650 . - . ID=gi|228288719|ref|NC_012624.1|_IS_1_TIR;parent=gi|228288719|ref|NC_012624.1|_IS_1
b
diff -r 000000000000 -r debe9c750cdd test-data/test_2.fna.is.fna
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_2.fna.is.fna Thu Sep 01 09:29:56 2022 +0000
b
@@ -0,0 +1,22 @@
+>gi|228288719|ref|NC_012624.1|_32413_33650_- ISH3_198|ISH3||gi|15898236|ref|NP_342841
+CCCTAATACGGTAGACCCAAAATCGTATTATGAAAAATAATTCTGACAGAATAATATCGA
+TTCATGATAGATAACAGGATTTATATGAGTATTAAGAATTCAGTTGAAGAAAGGTTTAAG
+CTGAGGGAGATGTGGTATTACCACGTGAGTAAGTGAGATGGTAACACCGGTACTCCCTCA
+CCAAAATAACATTCAACAAATAGGATATAAATTACTTTCCATGCTGAACTTCAAGGGAAA
+GAAAGGGGAAGAGGTGGCGAGAACCCTCATCTCAGCGTGTTTATGGAACGATTCGGTGGA
+AAGCAAGTCGAGGGCGTATGGCGTGTCCCCACAGACCGTGAGGAATTACGTGGAGGAGCA
+AGGGATGGAAGTTATTGAGAAACTCTTGGAAAGCGCCAGGAAGATATCCTTGAAGGTACT
+GAAGGGAGTCAAGGAGATAGACGTCTCAATAGACTGGACAACCAAGACCTGGTACGGGAG
+ACCGGTGGGAGGGCTCGGGAGTTCGGAGGAGGGAAACTCTTGGAACTACGCAACTGCGAC
+GACAAAGTTTAATGGGAAAGTGCTCCTACTGGCCTTCGTCACTCAAGTCAAGGGGATGAC
+TAAGGAAGAGATCGTGAAGGCCCTCGTGGAGCAAGTCGTCGCGATGGGGTTCAAGATAAG
+GTTGATAACTCTTGACGCAGGTTTCTATACTGTTGATGTGCTCAACTTCATTTCACAGTT
+TAAGTATATAGTTGCTGTGCCTGTTGGGGATGTTAAGGTTTATGAGGAGTTTGACGGGGA
+TTACGCAACTAATAGTAAGAGGCATAGGAGGGATGAGCAGGTCAAGTTCAGGCTTCTCGT
+GTACAGCAAGGAAAAAGTGAGGAGAAAGAAGAAGAGTGTTGTTTATTTTGCTAGGGCTAC
+TAATCTAGACCTACCGAAGGGGGAGGTGTTGGATTTGTACAATAAGGTAAGGGGTCCCAT
+AGAGACCTCTTATAGGAACATTAAGGCTTTTCTTCCATTTACTAGTTCTACTAAGTTTGT
+TTTCCGCACGTTGATCTTCGTGCTGGCCGTGGCCTTCTACTCCCTGTACACCGTGTTCAA
+GGGGGAGGTGAGGAGGGAGCAGTTCAGGATACTCCTAATACTCTTGTTTTCTGATGATTT
+ATTCTATCTAAAAGATTTTCTATTTAAATCAATAGAACCGCTTATTAATAATATAGATTT
+ATTTTCAAGGAGGTGATTTTGGGTCTACCGAATAAGGG
b
diff -r 000000000000 -r debe9c750cdd test-data/test_2.fna.orf.faa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_2.fna.orf.faa Thu Sep 01 09:29:56 2022 +0000
b
@@ -0,0 +1,7 @@
+>gi|228288719|ref|NC_012624.1|_32517_33587_-
+MIDNRIYMSIKNSVEERFKLREMWYYHVRSEMVTPVLPHQNNIQQIGYKLLSMLNFKGKK
+GEEVARTLISACLWNDSVESKSRAYGVSPQTVRNYVEEQGMEVIEKLLESARKISLKVLK
+GVKEIDVSIDWTTKTWYGRPVGGLGSSEEGNSWNYATATTKFNGKVLLLAFVTQVKGMTK
+EEIVKALVEQVVAMGFKIRLITLDAGFYTVDVLNFISQFKYIVAVPVGDVKVYEEFDGDY
+ATNSKRHRRDXAGQVQASRVQQGKSEEKEEDVVYFARATNLDLPKGEVLDLYNKVRGPIE
+TSYRNIKAFLPFTSSTKFVXPHVDLRAGRGLLLPVHRVQGGGEEGAVQDTPNTLVF
b
diff -r 000000000000 -r debe9c750cdd test-data/test_2.fna.orf.fna
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_2.fna.orf.fna Thu Sep 01 09:29:56 2022 +0000
b
@@ -0,0 +1,19 @@
+>gi|228288719|ref|NC_012624.1|_32517_33587_-
+ATGATAGATAACAGGATTTATATGAGTATTAAGAATTCAGTTGAAGAAAGGTTTAAGCTG
+AGGGAGATGTGGTATTACCACGTGAGTAAGTGAGATGGTAACACCGGTACTCCCTCACCA
+AAATAACATTCAACAAATAGGATATAAATTACTTTCCATGCTGAACTTCAAGGGAAAGAA
+AGGGGAAGAGGTGGCGAGAACCCTCATCTCAGCGTGTTTATGGAACGATTCGGTGGAAAG
+CAAGTCGAGGGCGTATGGCGTGTCCCCACAGACCGTGAGGAATTACGTGGAGGAGCAAGG
+GATGGAAGTTATTGAGAAACTCTTGGAAAGCGCCAGGAAGATATCCTTGAAGGTACTGAA
+GGGAGTCAAGGAGATAGACGTCTCAATAGACTGGACAACCAAGACCTGGTACGGGAGACC
+GGTGGGAGGGCTCGGGAGTTCGGAGGAGGGAAACTCTTGGAACTACGCAACTGCGACGAC
+AAAGTTTAATGGGAAAGTGCTCCTACTGGCCTTCGTCACTCAAGTCAAGGGGATGACTAA
+GGAAGAGATCGTGAAGGCCCTCGTGGAGCAAGTCGTCGCGATGGGGTTCAAGATAAGGTT
+GATAACTCTTGACGCAGGTTTCTATACTGTTGATGTGCTCAACTTCATTTCACAGTTTAA
+GTATATAGTTGCTGTGCCTGTTGGGGATGTTAAGGTTTATGAGGAGTTTGACGGGGATTA
+CGCAACTAATAGTAAGAGGCATAGGAGGGATGAGCAGGTCAAGTTCAGGCTTCTCGTGTA
+CAGCAAGGAAAAAGTGAGGAGAAAGAAGAAGAGTGTTGTTTATTTTGCTAGGGCTACTAA
+TCTAGACCTACCGAAGGGGGAGGTGTTGGATTTGTACAATAAGGTAAGGGGTCCCATAGA
+GACCTCTTATAGGAACATTAAGGCTTTTCTTCCATTTACTAGTTCTACTAAGTTTGTTTT
+CCGCACGTTGATCTTCGTGCTGGCCGTGGCCTTCTACTCCCTGTACACCGTGTTCAAGGG
+GGAGGTGAGGAGGGAGCAGTTCAGGATACTCCTAATACTCTTGTTTTCTGA
b
diff -r 000000000000 -r debe9c750cdd test-data/test_2.fna.sum
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_2.fna.sum Thu Sep 01 09:29:56 2022 +0000
b
@@ -0,0 +1,3 @@
+# seqid                                                      family         nIS %Genome          bps4IS          dnaLen
+gi|228288719|ref|NC_012624.1|                                ISH3             1    2.93            1238           42245
+input                                                        total            1    2.93            1238           42245
b
diff -r 000000000000 -r debe9c750cdd test-data/test_2.fna.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_2.fna.tsv Thu Sep 01 09:29:56 2022 +0000
b
@@ -0,0 +1,2 @@
+seqID family cluster isBegin isEnd isLen ncopy4is start1 end1 start2 end2 score irId irLen nGaps orfBegin orfEnd strand orfLen E-value E-value4copy type ov tir
+gi|228288719|ref|NC_012624.1| ISH3 ISH3_198|ISH3||gi|15898236|ref|NP_342841 32413 33650 1238 1 32413 32456 33607 33650 48 34 44 0 32517 33587 - 1071 2.3e-147 2.3e-147 c 1 CCCTTATTCGGTAGACCCAAAATCACCTCCTTGAAAATAAATCT:CCCTAATACGGTAGACCCAAAATCGTATTATGAAAAATAATTCT
b
diff -r 000000000000 -r debe9c750cdd test-data/test_2.log
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_2.log Thu Sep 01 09:29:56 2022 +0000
[
@@ -0,0 +1,43 @@
+/usr/local/bin/FragGeneScan -s input -o results/proteome/input -w 0 -t illumina_5 -p 1
+Using 1 threads.
+no. of seqs: 1
+Clock time used (by 1 threads) = 0.00 mins
+prepare gff file..
+Time elapsed: 0 hours 0 minutes and 0 seconds.
+ISEScan starts at Wed Aug 31 15:19:29 2022
+predict and translate genes from genome sequence into protein database using FragGeneScan program
+
+Begin to translate genome into proteome.
+Translating genome into proteome for input , return  0
+
+Finish translating genome into proteome. Wed Aug 31 15:19:29 2022
+
+Begin to phmmer search against proteome database. Wed Aug 31 15:19:29 2022
+Finish phmmer searching /usr/local/bin/pHMMs/clusters.single.faa  against results/proteome/input.faa , output results/hmm/clusters.single.faa.input.faa
+
+Finish phmmer searching against proteome database. Wed Aug 31 15:19:48 2022
+
+Begin to profile HMM search against proteome database. Wed Aug 31 15:19:48 2022
+Finish Profile HMM searching /usr/local/bin/pHMMs/clusters.faa.hmm  against results/proteome/input.faa , output results/hmm/clusters.faa.hmm.input.faa
+
+Finish profile HMM searching against proteome database. Wed Aug 31 15:20:01 2022
+pred begins at Wed Aug 31 15:20:01 2022
+Begin addNonORFcopy at Wed Aug 31 15:20:01 2022
+Finish addNonORFcopy at Wed Aug 31 15:20:01 2022
+Begin addNonORFcopy1 at Wed Aug 31 15:20:01 2022
+Finish addNonORFcopy1 at Wed Aug 31 15:20:01 2022
+getFullIS() begins at Wed Aug 31 15:20:01 2022
+Start removing partial IS elements
+Remove single-copy partial IS element with evalue > 1e-50: isLen=989 IS5 bd=[30355, 31343] orf('gi|228288719|ref|NC_012624.1|', 30399, 31298, '+') evalue=1.7e-37
+Finish removing partial IS elements
+Begin removeOverlappedHits at Wed Aug 31 15:20:01 2022
+gi|228288719|ref|NC_012624.1|: no intersected hits found
+Finish removeOverlappedHits at Wed Aug 31 15:20:01 2022
+Begin reading protein database at Wed Aug 31 15:20:01 2022
+Finish reading protein database at Wed Aug 31 15:20:01 2022
+Write IS elements from all sequences in input into one result file under results
+Write 1 IS copies to results/input.csv and results/input.tsv and results/input.raw
+Write summarized result to results/input.sum
+End in pred Wed Aug 31 15:20:01 2022
+Only complete IS elements are reported.
+ISEScan ends at Wed Aug 31 15:20:01 2022