Repository 'ctat_lncrna'
hg clone https://toolshed.g2.bx.psu.edu/repos/trinity_ctat/ctat_lncrna

Changeset 0:a940c4a36a43 (2018-07-17)
Commit message:
Upload ctat tools.
added:
ctat_lncrna.xml
test-data/slncky/reads.simPE.browse.html
test-data/slncky/reads.simPE.canonical_to_lncs.txt
test-data/slncky/reads.simPE.canonical_to_lncs.txt.sorted
test-data/slncky/reads.simPE.cluster_info.txt
test-data/slncky/reads.simPE.cluster_info.txt.sorted
test-data/slncky/reads.simPE.filtered_info.txt
test-data/slncky/reads.simPE.filtered_info.txt.sorted
test-data/slncky/reads.simPE.lncs.bed
test-data/slncky/reads.simPE.lncs.bed.sorted
test-data/slncky/reads.simPE.lncs.info.txt
test-data/slncky/reads.simPE.lncs.info.txt.sorted
test-data/slncky/reads.simPE.orfs.txt
test-data/slncky/reads.simPE.orthologs.top.txt
test-data/slncky/reads.simPE.orthologs.top.txt.sorted
test-data/slncky/reads.simPE.orthologs.txt
test-data/slncky/reads.simPE.orthologs.txt.sorted
tool-data/ctat_lncrna_annotations.loc.sample
tool_data_table_conf.xml.sample
b
diff -r 000000000000 -r a940c4a36a43 ctat_lncrna.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ctat_lncrna.xml Tue Jul 17 11:49:16 2018 -0400
[
b'@@ -0,0 +1,181 @@\n+<tool id="ctat_lncrna" name="ctat_lncrna" version="1.0.0" profile="17.05">\n+\n+    <description>Filters a high-quality set of lncRNA from reconstructed RNA-seq data</description>\n+    <requirements>\n+    <requirement type="package" version="1.0.1">ctat-lncrna</requirement>\n+    </requirements>\n+    <command detect_errors="default">\n+      <![CDATA[\n+     #if str($bedfile) != "":\n+     lncrna --bedfile $bedfile\n+     --config "${assembly.fields.path}" \n+     #end if\n+\n+     #if str($assembly) != "":\n+         --assembly "${assembly.fields.value}"\n+     #end if\n+\n+     #if $min_overlap != "":\n+         --min_overlap "$min_overlap"\n+     #end if\n+\n+     #if str($min_cluster) != "":\n+         --min_cluster "$min_cluster"\n+     #end if\n+\n+     #if str($min_coding) != "":\n+         --min_coding "$min_coding"\n+     #end if\n+\n+    #if str($minMatch) != "":\n+        --minMatch "$minMatch"\n+    #end if\n+\n+    #if str($pad) != "":\n+        --pad "$pad"\n+    #end if\n+\n+    #if str($gap_open) != "":\n+        --gap_open "$gap_open"\n+    #end if\n+\n+    #if str($gap_extend) != "":\n+        --gap_extend "$gap_extend"\n+    #end if\n+    --web \n+    && cp -r slncky.EvolutionBrowser $html_file.files_path\n+]]>\n+    </command>\n+\n+    <inputs>\n+      <param name="bedfile" format="interval" type="data" value="bedfile" label="Bed File:" help="Reconstructed transcripts (ex. from StringTie or Trinity)"/>\n+      <param name="min_overlap" type="float" value="0.0" label="Min overlap" help="Remove any transcript that overlap annotated coding gene greater than min_overlap" />\n+      <param name="min_cluster" type="integer" value="2" label="Min cluster" help="Minimum size of duplication clusters to remove" />\n+      <param name="min_coding" type="float" value="0.1" label="Min exonic identity to filter out transcript that aligns to orthologous coding gene" help="default is set by learning coding alignment distribution from data"/>\n+      <param name="minMatch" type="float" value="0.1" label="Min match" help="Minimum match parameter for liftover" />\n+      <param name="pad" type="integer" value="0" label="Pad" help="No. of basepairs to search up- and down-stream when lifting over lnc to ortholog" />\n+      <param name="gap_open" type="integer" value="200" label="Gap open penalty to pass to lastz" help="Decreasing this parameter will increase sensitivity of orthology search"/>\n+      <param name="gap_extend" type="integer" value="40" label="Gap extend penalty to pass to lastz" help="Decreasing this parameter will increase sensitivity of orthology search."/>\n+      <param name="assembly" type="select" label="Choose annotation:" help="Select annotation">\n+            <options from_data_table="ctat_lncrna_annotations" />\n+      </param>\n+    </inputs>\n+\n+    <outputs>\n+           <data format="html" name="html_file" label="web_report" from_work_dir="slncky.EvolutionBrowser/browse.html"/>\n+           <data format="tabular" name="canonical_to_lncs" label="canonical_to_lncs" from_work_dir="slncky.canonical_to_lncs.txt"/>\n+           <data format="tabular" name="cluster_info" label="cluster_info" from_work_dir="slncky.cluster_info.txt"/>\n+           <data format="tabular" name="filtered_info" label="filtered_info" from_work_dir="slncky.filtered_info.txt"/>\n+           <data format="bed" name="lncs" label="lncs" from_work_dir="slncky.lncs.bed"/>\n+           <data format="tabular" name="lncs_info" label="lncs.info" from_work_dir="slncky.lncs.info.txt"/>\n+           <data format="tabular" name="orfs" label="orfs" from_work_dir="slncky.orfs.txt"/>\n+           <data format="tabular" name="orthologs_top" label="orthologs.top" from_work_dir="slncky.orthologs.top.txt"/>\n+           <data format="tabular" name="orthologs" label="orthologs" from_work_dir="slncky.orthologs.txt"/>\n+    </outputs>\n+\n+    <stdio>\n+          <exit_code range="1:" level="fatal" description="Error in SLNCky" />\n+    </stdio>\n+\n+    <tests>\n+        <test>\n+            <param name="bed'..b'format="bed" name="lncs" label="lncs" from_work_dir="slncky.lncs.bed"/> -->\n+            <output name="lncs" file="slncky/reads.simPE.lncs.bed" />\n+\n+            <!-- <data format="txt" name="lncs_info" label="lncs.info" from_work_dir="slncky.lncs.info.txt"/> -->\n+            <output name="lncs_info" file="slncky/reads.simPE.lncs.info.txt" />\n+\n+            <!-- <data format="txt" name="orfs" label="orfs" from_work_dir="slncky.orfs.txt"/> -->\n+            <output name="orfs" file="slncky/reads.simPE.orfs.txt" />\n+\n+            <!-- <data format="txt" name="orthologs_top" label="orthologs.top" from_work_dir="slncky.orthologs.top.txt"/> -->\n+            <output name="orthologs_top" file="slncky/reads.simPE.orthologs.top.txt" />\n+\n+            <!-- <data format="txt" name="orthologs" label="orthologs" from_work_dir="slncky.orthologs.txt"/> -->\n+            <output name="orthologs" file="slncky/reads.simPE.orthologs.txt" />\n+\n+        </test>\n+        <test>\n+            <param name="bedfile" value="mir-17-92.StringTie.transcripts.bed" />\n+            <param name="assembly" value="hg19" /> \n+            <param name="min_overlap" value="0.0" />\n+            <param name="min_cluster" value="2" />\n+            <param name="min_coding" value="0.1" />\n+            <param name="minMatch" value="0.1" />\n+            <param name="pad" value="0" />\n+            <param name="gap_open" value="200" />\n+            <param name="gap_extend" value="40" />\n+\n+            <!-- <data format="html" name="html_file" label="web_report" from_work_dir="slncky.EvolutionBrowser/browse.html"/> -->\n+            <output name="html_file" file="slncky/mir-17-92.browse.html" />\n+\n+            <!-- <data format="txt" name="canonical_to_lncs" label="canonical_to_lncs" from_work_dir="slncky.canonical_to_lncs.txt"/> -->\n+            <output name="canonical_to_lncs" file="slncky/mir-17-92.canonical_to_lncs.txt.sorted" sort="true" />\n+ \n+            <!-- <data format="txt" name="cluster_info" label="cluster_info" from_work_dir="slncky.cluster_info.txt"/> -->\n+            <!-- This file isn\'t being created for some reason...\n+            <output name="cluster_info" >\n+                <assert_contents>\n+                    <has_line_matching expression=".+" />\n+                </assert_contents>\n+            </output>\n+            -->\n+\n+            <!-- <data format="txt" name="filtered_info" label="filtered_info" from_work_dir="slncky.filtered_info.txt"/> -->\n+            <output name="filtered_info" file="slncky/mir-17-92.filtered_info.txt.sorted" sort="true" />\n+\n+            <!-- <data format="bed" name="lncs" label="lncs" from_work_dir="slncky.lncs.bed"/> -->\n+            <output name="lncs" file="slncky/mir-17-92.lncs.bed" />\n+\n+            <!-- <data format="txt" name="lncs_info" label="lncs.info" from_work_dir="slncky.lncs.info.txt"/> -->\n+            <output name="lncs_info" file="slncky/mir-17-92.lncs.info.txt" />\n+\n+            <!-- <data format="txt" name="orfs" label="orfs" from_work_dir="slncky.orfs.txt"/> -->\n+            <output name="orfs" file="slncky/mir-17-92.orfs.txt" />\n+\n+            <!-- <data format="txt" name="orthologs_top" label="orthologs.top" from_work_dir="slncky.orthologs.top.txt"/> -->\n+            <output name="orthologs_top" file="slncky/mir-17-92.orthologs.top.txt" />\n+\n+            <!-- <data format="txt" name="orthologs" label="orthologs" from_work_dir="slncky.orthologs.txt"/> -->\n+            <output name="orthologs" file="slncky/mir-17-92.orthologs.txt" />\n+        </test>\n+    </tests>\n+\n+    <help>\n+.. class:: infomark\n+\n+slncky is a tool for lncRNA discovery from RNA-Seq data. slncky filters a high-quality set of noncoding transcripts, discovers lncRNA orthologs, and characterizes conserved lncRNA evolution.\n+\n+To learn more about slncky visit their website_ .\n+\n+.. _website: https://github.com/slncky/slncky\n+\n+</help>\n+\n+<citations>\n+        <citation type="doi">10.1186/s13059-016-0880-9</citation>\n+</citations>\n+\n+</tool>\n'
b
diff -r 000000000000 -r a940c4a36a43 test-data/slncky/reads.simPE.browse.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/slncky/reads.simPE.browse.html Tue Jul 17 11:49:16 2018 -0400
b
@@ -0,0 +1,83 @@
+<html>
+<head>
+<script type="text/javascript" charset="utf8" src="search.js"></script>
+
+<!-- DataTables CSS -->
+<link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/1.10.9/css/jquery.dataTables.css">
+<!-- Bootstrap CSS -->
+<link rel="stylesheet" type="text/css" href="https://bootswatch.com/paper/bootstrap.min.css">
+<!-- jQuery -->
+<script type="text/javascript" charset="utf8" src="https://code.jquery.com/jquery-1.11.3.min.js"></script>
+
+<!-- DataTables -->
+<script type="text/javascript" charset="utf8" src="https://cdn.datatables.net/1.10.9/js/jquery.dataTables.min.js"></script>
+<style> select { font-size: 90%; width: 30px; } th { font-size: 80%; } td { font-size: 80%; } td.red {color: red;} td.green {color: #009900;} </style>
+<script type="text/javascript" class="init">
+$(document).ready(function() {
+ $('#table_id').DataTable();
+} );
+</script>
+
+
+</head>
+<body>
+
+<nav class="navbar navbar-default">
+<div class="container-fluid" style="position: absolute; width: 100%;">
+<div class="navbar-header">
+<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-2">
+<span class="sr-only">Toggle navigation</span>
+<span class="icon-bar"></span>
+<span class="icon-bar"></span>
+<span class="icon-bar"></span>
+</button>
+<a class ="navbar-brand" href="#"><p style="font-size:26"><em>slncky</em> Evolution Browser</p></a>
+</div>
+
+<div class="collapse navbar-collapse" id="bs-example-navbar-collapse-2">
+<ul class="nav navbar-nav">
+
+<a href="#"><li class="active">Browse</a></li>
+</ul>
+<ul class="nav navbar-nav navbar-right">
+</ul>
+</div>
+</div>
+</nav>
+
+<div class="container">
+<div class="jumbotron">
+<h2>Browse lncRNAs</h2>  
+<hr>
+<br>
+<table id="table_id" class="table table-striped table-hover" cellspacing="0" width="100%">
+<thead>
+<tr>
+<th>Lnc</th>
+<th>Name</th>
+<th>Ortholog</th>
+<th>Ortholog Name</th>
+<th>Type</th>
+<th>Transcript-Transcript Identity</th>
+<th>Trascript-Genome Identity</th>
+<th>Indel Rate</th>
+<th>Conserved Splice Sites</th>
+<th>Total Splice Sites</th>
+<th>Lnc Exons Aligned</th>
+<th>Ortholog Exons Aligned</th>
+</tr>
+</thead>
+<tbody>
+<tr><td><a href="lnc/STRG.25.1.html" target="_blank">STRG.25.1</a></td><td>MALAT1</td><td>uc008gfj.2</td><td>Malat1</td><td>intergenic</td><td>0.63</td><td>0.63</td><td>-</td><td>0.0</td><td>0</td><td>1,</td><td>1,</td></tr>
+<tr><td><a href="lnc/STRG.5.1.html" target="_blank">STRG.5.1</a></td><td>Unannotated</td><td>uc009vfa.1</td><td>Atpase6</td><td>intergenic</td><td>0.00</td><td>0.00</td><td>-</td><td>0.0</td><td>0</td><td>NA</td><td>NA</td></tr>
+<tr><td><a href="lnc/STRG.52.1.html" target="_blank">STRG.52.1</a></td><td>JA760615</td><td>uc012hdm.1</td><td>BC071253</td><td>intergenic</td><td>0.00</td><td>-0.00</td><td>-</td><td>0.0</td><td>0</td><td>NA</td><td>NA</td></tr>
+<tr><td><a href="lnc/STRG.48.2.html" target="_blank">STRG.48.2</a></td><td>OK/SW-cl.16</td><td>uc009vez.1</td><td>Cox2</td><td>intergenic</td><td>0.00</td><td>-0.00</td><td>-</td><td>0.0</td><td>0</td><td>NA</td><td>NA</td></tr>
+<tr><td><a href="lnc/STRG.44.1.html" target="_blank">STRG.44.1</a></td><td>TVAS5</td><td>uc009vew.1</td><td>AK018753</td><td>intergenic</td><td>0.00</td><td>-0.00</td><td>-</td><td>0.0</td><td>0</td><td>NA</td><td>NA</td></tr>
+<tr><td><a href="lnc/STRG.49.1.html" target="_blank">STRG.49.1</a></td><td>OK/SW-cl.16</td><td>uc009vfa.1</td><td>Atpase6</td><td>intergenic</td><td>0.00</td><td>-0.00</td><td>-</td><td>0.0</td><td>0</td><td>NA</td><td>NA</td></tr>
+<tr><td><a href="lnc/STRG.45.1.html" target="_blank">STRG.45.1</a></td><td>TVAS5</td><td>uc009vew.1</td><td>AK018753</td><td>intergenic</td><td>0.00</td><td>-0.00</td><td>-</td><td>0.0</td><td>0</td><td>NA</td><td>NA</td></tr>
+<tr><td><a href="lnc/STRG.51.1.html" target="_blank">STRG.51.1</a></td><td>MTND5</td><td>uc009vfc.1</td><td>Cytb</td><td>intergenic</td><td>0.00</td><td>-0.00</td><td>-</td><td>0.0</td><td>0</td><td>NA</td><td>NA</td></tr>
+</tbody>
+</table>
+</div>
+</body>
+</html>
b
diff -r 000000000000 -r a940c4a36a43 test-data/slncky/reads.simPE.canonical_to_lncs.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/slncky/reads.simPE.canonical_to_lncs.txt Tue Jul 17 11:49:16 2018 -0400
b
@@ -0,0 +1,27 @@
+#canonical lncs
+STRG.45.1 STRG.45.1,
+STRG.28.1 STRG.28.2,STRG.28.1,
+STRG.25.1 STRG.25.2,STRG.25.1,
+STRG.44.1 STRG.44.1,
+STRG.2.1 STRG.2.1,
+STRG.15.2 STRG.15.2,STRG.15.1,
+STRG.50.1 STRG.50.1,
+STRG.3.1 STRG.3.1,
+STRG.17.1 STRG.17.1,
+STRG.48.1 STRG.48.1,
+STRG.48.2 STRG.48.2,
+STRG.1.1 STRG.1.1,
+STRG.42.1 STRG.42.1,
+STRG.51.1 STRG.51.1,
+STRG.8.1 STRG.8.1,
+STRG.7.1 STRG.7.1,
+STRG.6.1 STRG.6.1,
+STRG.52.1 STRG.52.1,
+STRG.10.1 STRG.10.1,
+STRG.47.1 STRG.47.1,
+STRG.46.1 STRG.46.1,
+STRG.5.1 STRG.5.1,STRG.5.2,STRG.4.1,
+STRG.22.1 STRG.22.1,
+STRG.43.1 STRG.43.1,
+STRG.23.1 STRG.23.1,
+STRG.49.1 STRG.49.1,STRG.49.2,STRG.49.3,
b
diff -r 000000000000 -r a940c4a36a43 test-data/slncky/reads.simPE.canonical_to_lncs.txt.sorted
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/slncky/reads.simPE.canonical_to_lncs.txt.sorted Tue Jul 17 11:49:16 2018 -0400
b
@@ -0,0 +1,27 @@
+#canonical lncs
+STRG.10.1 STRG.10.1,
+STRG.1.1 STRG.1.1,
+STRG.15.2 STRG.15.2,STRG.15.1,
+STRG.17.1 STRG.17.1,
+STRG.2.1 STRG.2.1,
+STRG.22.1 STRG.22.1,
+STRG.23.1 STRG.23.1,
+STRG.25.1 STRG.25.2,STRG.25.1,
+STRG.28.1 STRG.28.2,STRG.28.1,
+STRG.3.1 STRG.3.1,
+STRG.42.1 STRG.42.1,
+STRG.43.1 STRG.43.1,
+STRG.44.1 STRG.44.1,
+STRG.45.1 STRG.45.1,
+STRG.46.1 STRG.46.1,
+STRG.47.1 STRG.47.1,
+STRG.48.1 STRG.48.1,
+STRG.48.2 STRG.48.2,
+STRG.49.1 STRG.49.1,STRG.49.2,STRG.49.3,
+STRG.50.1 STRG.50.1,
+STRG.51.1 STRG.51.1,
+STRG.5.1 STRG.5.1,STRG.5.2,STRG.4.1,
+STRG.52.1 STRG.52.1,
+STRG.6.1 STRG.6.1,
+STRG.7.1 STRG.7.1,
+STRG.8.1 STRG.8.1,
b
diff -r 000000000000 -r a940c4a36a43 test-data/slncky/reads.simPE.cluster_info.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/slncky/reads.simPE.cluster_info.txt Tue Jul 17 11:49:16 2018 -0400
b
@@ -0,0 +1,6 @@
+3 N STRG.22.1,STRG.15.2,STRG.6.1,
+3 N STRG.7.1,STRG.10.1,STRG.43.1,
+2 N STRG.1.1,STRG.46.1,
+2 N STRG.47.1,STRG.2.1,
+2 N STRG.48.1,STRG.3.1,
+#clusterSize alignToDup? transcripts
b
diff -r 000000000000 -r a940c4a36a43 test-data/slncky/reads.simPE.cluster_info.txt.sorted
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/slncky/reads.simPE.cluster_info.txt.sorted Tue Jul 17 11:49:16 2018 -0400
b
@@ -0,0 +1,6 @@
+2 N STRG.1.1,STRG.46.1,
+2 N STRG.47.1,STRG.2.1,
+2 N STRG.48.1,STRG.3.1,
+3 N STRG.22.1,STRG.15.2,STRG.6.1,
+3 N STRG.7.1,STRG.10.1,STRG.43.1,
+#clusterSize alignToDup? transcripts
b
diff -r 000000000000 -r a940c4a36a43 test-data/slncky/reads.simPE.filtered_info.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/slncky/reads.simPE.filtered_info.txt Tue Jul 17 11:49:16 2018 -0400
b
@@ -0,0 +1,49 @@
+STRG.19.1 200.0% exonic overlap with coding transcript uc003ubx.4
+STRG.16.1 100.0% exonic overlap with coding transcript NM_001003806
+STRG.14.1 100.0% exonic overlap with coding transcript uc003nxk.2
+STRG.29.1 100.0% exonic overlap with coding transcript NM_001014449
+STRG.13.1 100.0% exonic overlap with coding transcript NM_005345
+STRG.27.1 100.0% exonic overlap with coding transcript uc001oja.3
+STRG.18.1 100.0% exonic overlap with coding transcript uc003tzi.4
+STRG.26.1 100.0% exonic overlap with coding transcript uc001oiw.2
+STRG.20.1 99.8% exonic overlap with coding transcript NM_198085
+STRG.37.1 99.8% exonic overlap with coding transcript uc002klr.3
+STRG.41.1 99.6% exonic overlap with coding transcript NM_014649
+STRG.33.1 97.7% exonic overlap with coding transcript NM_198993
+STRG.40.1 88.6% exonic overlap with coding transcript NM_002967
+STRG.31.3 71.4% exonic overlap with coding transcript uc002hqy.1
+STRG.32.1 40.2% exonic overlap with coding transcript uc002hrq.1
+STRG.30.1 37.4% exonic overlap with coding transcript uc002fnn.2
+STRG.34.1 35.7% exonic overlap with coding transcript uc002hsq.3
+STRG.24.1 32.7% exonic overlap with coding transcript uc001ocx.3
+STRG.39.1 32.6% exonic overlap with coding transcript NM_004152
+STRG.36.1 20.8% exonic overlap with coding transcript uc002iyw.4
+STRG.38.1 transcript entirely within coding transcript NM_001003652
+STRG.11.1 transcript entirely within coding transcript NM_173666
+STRG.21.2 transcript entirely within coding transcript uc011leg.2
+STRG.21.1 transcript entirely within coding transcript NM_001164750
+STRG.31.1 transcript entirely within coding transcript NM_000978
+STRG.31.2 transcript entirely within coding transcript uc002hqx.1
+STRG.9.1 transcript entirely within coding transcript NM_020357
+STRG.35.1 transcript entirely within coding transcript NM_001075099
+STRG.12.1 transcript entirely within coding transcript uc011dmq.2
+STRG.2.1 aligns to STRG.47.1 with 98.6% identity and 99.9% coverage. Appears to be duplication.
+STRG.15.2 aligns to STRG.22.1 with 99.4% identity and 100.0% coverage. Appears to be duplication.
+STRG.15.2 aligns to STRG.6.1 with 97.0% identity and 100.0% coverage. Appears to be duplication.
+STRG.3.1 aligns to STRG.48.1 with 98.2% identity and 99.9% coverage. Appears to be duplication.
+STRG.48.1 aligns to STRG.3.1 with 98.2% identity and 99.9% coverage. Appears to be duplication.
+STRG.1.1 aligns to STRG.46.1 with 98.5% identity and 99.9% coverage. Appears to be duplication.
+STRG.7.1 aligns to STRG.43.1 with 90.7% identity and 100.0% coverage. Appears to be duplication.
+STRG.7.1 aligns to STRG.10.1 with 90.1% identity and 98.9% coverage. Appears to be duplication.
+STRG.6.1 aligns to STRG.22.1 with 97.1% identity and 100.0% coverage. Appears to be duplication.
+STRG.6.1 aligns to STRG.15.2 with 97.0% identity and 100.0% coverage. Appears to be duplication.
+STRG.10.1 aligns to STRG.7.1 with 90.1% identity and 98.9% coverage. Appears to be duplication.
+STRG.10.1 aligns to STRG.43.1 with 89.5% identity and 99.3% coverage. Appears to be duplication.
+STRG.47.1 aligns to STRG.2.1 with 98.6% identity and 99.9% coverage. Appears to be duplication.
+STRG.46.1 aligns to STRG.1.1 with 98.5% identity and 99.9% coverage. Appears to be duplication.
+STRG.22.1 aligns to STRG.15.2 with 99.4% identity and 100.0% coverage. Appears to be duplication.
+STRG.22.1 aligns to STRG.6.1 with 97.1% identity and 100.0% coverage. Appears to be duplication.
+STRG.43.1 aligns to STRG.10.1 with 89.5% identity and 99.3% coverage. Appears to be duplication.
+STRG.43.1 aligns to STRG.7.1 with 90.7% identity and 100.0% coverage. Appears to be duplication.
+STRG.17.1 aligns to mm9 coding transcript uc008zth.2 with 65.4% exonic identity
+STRG.17.1 aligns to mm9 coding transcript NM_133900 with 65.4% exonic identity
b
diff -r 000000000000 -r a940c4a36a43 test-data/slncky/reads.simPE.filtered_info.txt.sorted
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/slncky/reads.simPE.filtered_info.txt.sorted Tue Jul 17 11:49:16 2018 -0400
b
@@ -0,0 +1,49 @@
+STRG.10.1 aligns to STRG.43.1 with 89.5% identity and 99.3% coverage. Appears to be duplication.
+STRG.10.1 aligns to STRG.7.1 with 90.1% identity and 98.9% coverage. Appears to be duplication.
+STRG.11.1 transcript entirely within coding transcript NM_173666
+STRG.1.1 aligns to STRG.46.1 with 98.5% identity and 99.9% coverage. Appears to be duplication.
+STRG.12.1 transcript entirely within coding transcript uc011dmq.2
+STRG.13.1 100.0% exonic overlap with coding transcript NM_005345
+STRG.14.1 100.0% exonic overlap with coding transcript uc003nxk.2
+STRG.15.2 aligns to STRG.22.1 with 99.4% identity and 100.0% coverage. Appears to be duplication.
+STRG.15.2 aligns to STRG.6.1 with 97.0% identity and 100.0% coverage. Appears to be duplication.
+STRG.16.1 100.0% exonic overlap with coding transcript NM_001003806
+STRG.17.1 aligns to mm9 coding transcript NM_133900 with 65.4% exonic identity
+STRG.17.1 aligns to mm9 coding transcript uc008zth.2 with 65.4% exonic identity
+STRG.18.1 100.0% exonic overlap with coding transcript uc003tzi.4
+STRG.19.1 200.0% exonic overlap with coding transcript uc003ubx.4
+STRG.20.1 99.8% exonic overlap with coding transcript NM_198085
+STRG.21.1 transcript entirely within coding transcript NM_001164750
+STRG.21.2 transcript entirely within coding transcript uc011leg.2
+STRG.2.1 aligns to STRG.47.1 with 98.6% identity and 99.9% coverage. Appears to be duplication.
+STRG.22.1 aligns to STRG.15.2 with 99.4% identity and 100.0% coverage. Appears to be duplication.
+STRG.22.1 aligns to STRG.6.1 with 97.1% identity and 100.0% coverage. Appears to be duplication.
+STRG.24.1 32.7% exonic overlap with coding transcript uc001ocx.3
+STRG.26.1 100.0% exonic overlap with coding transcript uc001oiw.2
+STRG.27.1 100.0% exonic overlap with coding transcript uc001oja.3
+STRG.29.1 100.0% exonic overlap with coding transcript NM_001014449
+STRG.30.1 37.4% exonic overlap with coding transcript uc002fnn.2
+STRG.31.1 transcript entirely within coding transcript NM_000978
+STRG.31.2 transcript entirely within coding transcript uc002hqx.1
+STRG.31.3 71.4% exonic overlap with coding transcript uc002hqy.1
+STRG.3.1 aligns to STRG.48.1 with 98.2% identity and 99.9% coverage. Appears to be duplication.
+STRG.32.1 40.2% exonic overlap with coding transcript uc002hrq.1
+STRG.33.1 97.7% exonic overlap with coding transcript NM_198993
+STRG.34.1 35.7% exonic overlap with coding transcript uc002hsq.3
+STRG.35.1 transcript entirely within coding transcript NM_001075099
+STRG.36.1 20.8% exonic overlap with coding transcript uc002iyw.4
+STRG.37.1 99.8% exonic overlap with coding transcript uc002klr.3
+STRG.38.1 transcript entirely within coding transcript NM_001003652
+STRG.39.1 32.6% exonic overlap with coding transcript NM_004152
+STRG.40.1 88.6% exonic overlap with coding transcript NM_002967
+STRG.41.1 99.6% exonic overlap with coding transcript NM_014649
+STRG.43.1 aligns to STRG.10.1 with 89.5% identity and 99.3% coverage. Appears to be duplication.
+STRG.43.1 aligns to STRG.7.1 with 90.7% identity and 100.0% coverage. Appears to be duplication.
+STRG.46.1 aligns to STRG.1.1 with 98.5% identity and 99.9% coverage. Appears to be duplication.
+STRG.47.1 aligns to STRG.2.1 with 98.6% identity and 99.9% coverage. Appears to be duplication.
+STRG.48.1 aligns to STRG.3.1 with 98.2% identity and 99.9% coverage. Appears to be duplication.
+STRG.6.1 aligns to STRG.15.2 with 97.0% identity and 100.0% coverage. Appears to be duplication.
+STRG.6.1 aligns to STRG.22.1 with 97.1% identity and 100.0% coverage. Appears to be duplication.
+STRG.7.1 aligns to STRG.10.1 with 90.1% identity and 98.9% coverage. Appears to be duplication.
+STRG.7.1 aligns to STRG.43.1 with 90.7% identity and 100.0% coverage. Appears to be duplication.
+STRG.9.1 transcript entirely within coding transcript NM_020357
b
diff -r 000000000000 -r a940c4a36a43 test-data/slncky/reads.simPE.lncs.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/slncky/reads.simPE.lncs.bed Tue Jul 17 11:49:16 2018 -0400
b
@@ -0,0 +1,13 @@
+chrM 3306 4262 STRG.45.1 100.0 + 3306 4262 255,0,0 1 956 0
+chr13 82264045 82265207 STRG.28.1 100.0 + 82264045 82265207 255,0,0 1 1162 0
+chr11 65265232 65273940 STRG.25.1 100.0 + 65265232 65273940 255,0,0 1 8708 0
+chrM 1670 3229 STRG.44.1 100.0 + 1670 3229 255,0,0 1 1559 0
+chrM 10058 10404 STRG.50.1 100.0 + 10058 10404 255,0,0 1 346 0
+chrM 8294 8364 STRG.48.2 100.0 + 8294 8364 255,0,0 1 70 0
+chrX 108297360 108297792 STRG.42.1 100.0 - 108297360 108297792 255,0,0 1 432 0
+chrM 12336 14148 STRG.51.1 100.0 + 12336 14148 255,0,0 1 1812 0
+chr2 26901086 26901378 STRG.8.1 100.0 - 26901086 26901378 255,0,0 1 292 0
+chrM 14148 14673 STRG.52.1 100.0 - 14148 14673 255,0,0 1 525 0
+chr1 569075 569756 STRG.5.1 100.0 + 569075 569756 255,0,0 1 681 0
+chr10 23425852 23426590 STRG.23.1 100.0 + 23425852 23426590 255,0,0 1 738 0
+chrM 9206 9990 STRG.49.1 100.0 + 9206 9990 255,0,0 1 784 0
b
diff -r 000000000000 -r a940c4a36a43 test-data/slncky/reads.simPE.lncs.bed.sorted
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/slncky/reads.simPE.lncs.bed.sorted Tue Jul 17 11:49:16 2018 -0400
b
@@ -0,0 +1,13 @@
+chr10 23425852 23426590 STRG.23.1 100.0 + 23425852 23426590 255,0,0 1 738 0
+chr11 65265232 65273940 STRG.25.1 100.0 + 65265232 65273940 255,0,0 1 8708 0
+chr13 82264045 82265207 STRG.28.1 100.0 + 82264045 82265207 255,0,0 1 1162 0
+chr1 569075 569756 STRG.5.1 100.0 + 569075 569756 255,0,0 1 681 0
+chr2 26901086 26901378 STRG.8.1 100.0 - 26901086 26901378 255,0,0 1 292 0
+chrM 10058 10404 STRG.50.1 100.0 + 10058 10404 255,0,0 1 346 0
+chrM 12336 14148 STRG.51.1 100.0 + 12336 14148 255,0,0 1 1812 0
+chrM 14148 14673 STRG.52.1 100.0 - 14148 14673 255,0,0 1 525 0
+chrM 1670 3229 STRG.44.1 100.0 + 1670 3229 255,0,0 1 1559 0
+chrM 3306 4262 STRG.45.1 100.0 + 3306 4262 255,0,0 1 956 0
+chrM 8294 8364 STRG.48.2 100.0 + 8294 8364 255,0,0 1 70 0
+chrM 9206 9990 STRG.49.1 100.0 + 9206 9990 255,0,0 1 784 0
+chrX 108297360 108297792 STRG.42.1 100.0 - 108297360 108297792 255,0,0 1 432 0
b
diff -r 000000000000 -r a940c4a36a43 test-data/slncky/reads.simPE.lncs.info.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/slncky/reads.simPE.lncs.info.txt Tue Jul 17 11:49:16 2018 -0400
b
@@ -0,0 +1,13 @@
+STRG.45.1 TVAS5 intergenic
+STRG.28.1 Unannotated intergenic
+STRG.25.1 MALAT1 intergenic
+STRG.44.1 TVAS5 intergenic
+STRG.50.1 AD intergenic
+STRG.48.2 OK/SW-cl.16 intergenic
+STRG.42.1 Unannotated intergenic
+STRG.51.1 MTND5 intergenic
+STRG.8.1 Unannotated intergenic
+STRG.52.1 JA760615 intergenic
+STRG.5.1 Unannotated intergenic
+STRG.23.1 Unannotated intergenic
+STRG.49.1 OK/SW-cl.16 intergenic
b
diff -r 000000000000 -r a940c4a36a43 test-data/slncky/reads.simPE.lncs.info.txt.sorted
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/slncky/reads.simPE.lncs.info.txt.sorted Tue Jul 17 11:49:16 2018 -0400
b
@@ -0,0 +1,13 @@
+STRG.23.1 Unannotated intergenic
+STRG.25.1 MALAT1 intergenic
+STRG.28.1 Unannotated intergenic
+STRG.42.1 Unannotated intergenic
+STRG.44.1 TVAS5 intergenic
+STRG.45.1 TVAS5 intergenic
+STRG.48.2 OK/SW-cl.16 intergenic
+STRG.49.1 OK/SW-cl.16 intergenic
+STRG.50.1 AD intergenic
+STRG.51.1 MTND5 intergenic
+STRG.5.1 Unannotated intergenic
+STRG.52.1 JA760615 intergenic
+STRG.8.1 Unannotated intergenic
b
diff -r 000000000000 -r a940c4a36a43 test-data/slncky/reads.simPE.orfs.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/slncky/reads.simPE.orfs.txt Tue Jul 17 11:49:16 2018 -0400
b
@@ -0,0 +1,3 @@
+lnc lncName ortholog orthologName alignScore lengthLncOrf lengthOrthOrf kN kS kN\kS lncOrf orthOrf
+STRG.25.1 MALAT1 uc008gfj.2 Malat1 455430 90 90 0.04 0.02 2.286 ATGTTTCGTTTGCCTCAGACAGGTATCTCTTCGTTATCAGAAGAGTTGCTTCATTTCATCTGGGAGCAGAAAACAGCAGGCAGCTGTTAA ATGTTTCGTTTGCCTCAGACAGGTTTCTCTTCATAAGCAGAAGAGTTGCTTCATTCCATCTCGGAGCAGGAAACAGCAGACTGCTGTTGA
+STRG.25.1 MALAT1 cast.naive_gene.v2.1_1455.0_chr19 Malat1 455430 90 90 0.04 0.02 2.286 ATGTTTCGTTTGCCTCAGACAGGTATCTCTTCGTTATCAGAAGAGTTGCTTCATTTCATCTGGGAGCAGAAAACAGCAGGCAGCTGTTAA ATGTTTCGTTTGCCTCAGACAGGTTTCTCTTCATAAGCAGAAGAGTTGCTTCATTCCATCTCGGAGCAGGAAACAGCAGACTGCTGTTGA
b
diff -r 000000000000 -r a940c4a36a43 test-data/slncky/reads.simPE.orthologs.top.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/slncky/reads.simPE.orthologs.top.txt Tue Jul 17 11:49:16 2018 -0400
b
@@ -0,0 +1,9 @@
+#lnc lncGeneSymbol ortholog orthologGeneSymbol alignScore exonID locusID indelRate(exon) indelRate(intron) lncExonsAligned orthExonsAligned spliceConserved spliceTotal category(hg19) category(mm9)
+STRG.25.1 MALAT1 uc008gfj.2 Malat1 455430 0.63 0.63 0.049 NA 1, 1, 0.0 0 intergenic intergenic
+STRG.5.1 Unannotated uc009vfa.1 Atpase6 281595 0.00 0.00 NA NA NA NA 0.0 0 intergenic intergenic
+STRG.52.1 JA760615 uc012hdm.1 BC071253 726896 0.00 -0.00 NA NA NA NA 0.0 0 intergenic intergenic
+STRG.48.2 OK/SW-cl.16 uc009vez.1 Cox2 726896 0.00 -0.00 NA NA NA NA 0.0 0 intergenic intergenic
+STRG.44.1 TVAS5 uc009vew.1 AK018753 726896 0.00 -0.00 NA NA NA NA 0.0 0 intergenic intergenic
+STRG.49.1 OK/SW-cl.16 uc009vfa.1 Atpase6 726896 0.00 -0.00 NA NA NA NA 0.0 0 intergenic intergenic
+STRG.45.1 TVAS5 uc009vew.1 AK018753 726896 0.00 -0.00 NA NA NA NA 0.0 0 intergenic intergenic
+STRG.51.1 MTND5 uc009vfc.1 Cytb 726896 0.00 -0.00 NA NA NA NA 0.0 0 intergenic intergenic
b
diff -r 000000000000 -r a940c4a36a43 test-data/slncky/reads.simPE.orthologs.top.txt.sorted
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/slncky/reads.simPE.orthologs.top.txt.sorted Tue Jul 17 11:49:16 2018 -0400
b
@@ -0,0 +1,9 @@
+#lnc lncGeneSymbol ortholog orthologGeneSymbol alignScore exonID locusID indelRate(exon) indelRate(intron) lncExonsAligned orthExonsAligned spliceConserved spliceTotal category(hg19) category(mm9)
+STRG.25.1 MALAT1 uc008gfj.2 Malat1 455430 0.63 0.63 0.049 NA 1, 1, 0.0 0 intergenic intergenic
+STRG.44.1 TVAS5 uc009vew.1 AK018753 726896 0.00 -0.00 NA NA NA NA 0.0 0 intergenic intergenic
+STRG.45.1 TVAS5 uc009vew.1 AK018753 726896 0.00 -0.00 NA NA NA NA 0.0 0 intergenic intergenic
+STRG.48.2 OK/SW-cl.16 uc009vez.1 Cox2 726896 0.00 -0.00 NA NA NA NA 0.0 0 intergenic intergenic
+STRG.49.1 OK/SW-cl.16 uc009vfa.1 Atpase6 726896 0.00 -0.00 NA NA NA NA 0.0 0 intergenic intergenic
+STRG.51.1 MTND5 uc009vfc.1 Cytb 726896 0.00 -0.00 NA NA NA NA 0.0 0 intergenic intergenic
+STRG.5.1 Unannotated uc009vfa.1 Atpase6 281595 0.00 0.00 NA NA NA NA 0.0 0 intergenic intergenic
+STRG.52.1 JA760615 uc012hdm.1 BC071253 726896 0.00 -0.00 NA NA NA NA 0.0 0 intergenic intergenic
b
diff -r 000000000000 -r a940c4a36a43 test-data/slncky/reads.simPE.orthologs.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/slncky/reads.simPE.orthologs.txt Tue Jul 17 11:49:16 2018 -0400
b
@@ -0,0 +1,10 @@
+#lnc lncGeneSymbol ortholog orthologGeneSymbol alignScore exonID locusID indelRate(exon) indelRate(intron) lncExonsAligned orthExonsAligned spliceConserved spliceTotal category(hg19) category(mm9)
+STRG.52.1 JA760615 uc012hdm.1 BC071253 726896 0.00 -0.00 NA NA NA NA 0.0 0 intergenic intergenic
+STRG.51.1 MTND5 uc009vfc.1 Cytb 726896 0.00 -0.00 NA NA NA NA 0.0 0 intergenic intergenic
+STRG.5.1 Unannotated uc009vfa.1 Atpase6 281595 0.00 0.00 NA NA NA NA 0.0 0 intergenic intergenic
+STRG.49.1 OK/SW-cl.16 uc009vfa.1 Atpase6 726896 0.00 -0.00 NA NA NA NA 0.0 0 intergenic intergenic
+STRG.48.2 OK/SW-cl.16 uc009vez.1 Cox2 726896 0.00 -0.00 NA NA NA NA 0.0 0 intergenic intergenic
+STRG.45.1 TVAS5 uc009vew.1 AK018753 726896 0.00 -0.00 NA NA NA NA 0.0 0 intergenic intergenic
+STRG.44.1 TVAS5 uc009vew.1 AK018753 726896 0.00 -0.00 NA NA NA NA 0.0 0 intergenic intergenic
+STRG.25.1 MALAT1 uc008gfj.2 Malat1 455430 0.63 0.63 0.049 NA 1, 1, 0.0 0 intergenic intergenic
+STRG.25.1 MALAT1 cast.naive_gene.v2.1_1455.0_chr19 Malat1 455430 0.60 0.64 0.048 NA 1, 1,2, 0.0 0 intergenic intergenic
b
diff -r 000000000000 -r a940c4a36a43 test-data/slncky/reads.simPE.orthologs.txt.sorted
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/slncky/reads.simPE.orthologs.txt.sorted Tue Jul 17 11:49:16 2018 -0400
b
@@ -0,0 +1,10 @@
+#lnc lncGeneSymbol ortholog orthologGeneSymbol alignScore exonID locusID indelRate(exon) indelRate(intron) lncExonsAligned orthExonsAligned spliceConserved spliceTotal category(hg19) category(mm9)
+STRG.25.1 MALAT1 cast.naive_gene.v2.1_1455.0_chr19 Malat1 455430 0.60 0.64 0.048 NA 1, 1,2, 0.0 0 intergenic intergenic
+STRG.25.1 MALAT1 uc008gfj.2 Malat1 455430 0.63 0.63 0.049 NA 1, 1, 0.0 0 intergenic intergenic
+STRG.44.1 TVAS5 uc009vew.1 AK018753 726896 0.00 -0.00 NA NA NA NA 0.0 0 intergenic intergenic
+STRG.45.1 TVAS5 uc009vew.1 AK018753 726896 0.00 -0.00 NA NA NA NA 0.0 0 intergenic intergenic
+STRG.48.2 OK/SW-cl.16 uc009vez.1 Cox2 726896 0.00 -0.00 NA NA NA NA 0.0 0 intergenic intergenic
+STRG.49.1 OK/SW-cl.16 uc009vfa.1 Atpase6 726896 0.00 -0.00 NA NA NA NA 0.0 0 intergenic intergenic
+STRG.51.1 MTND5 uc009vfc.1 Cytb 726896 0.00 -0.00 NA NA NA NA 0.0 0 intergenic intergenic
+STRG.5.1 Unannotated uc009vfa.1 Atpase6 281595 0.00 0.00 NA NA NA NA 0.0 0 intergenic intergenic
+STRG.52.1 JA760615 uc012hdm.1 BC071253 726896 0.00 -0.00 NA NA NA NA 0.0 0 intergenic intergenic
b
diff -r 000000000000 -r a940c4a36a43 tool-data/ctat_lncrna_annotations.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/ctat_lncrna_annotations.loc.sample Tue Jul 17 11:49:16 2018 -0400
b
@@ -0,0 +1,15 @@
+# This file lists the locations of CTAT lncrna annotations
+# Usually there will only be one index, but it is concievable 
+# that there could be multiple annotations.
+# This file format is as follows
+# (white space characters are TAB characters):
+#
+#<value>    <name>  <path>
+# value is a unique id
+# name is the display name
+# path is the directory where the index files are stored
+#
+#ctat_lncrna_annotations.loc could look like:
+#
+#slncky_annotations CTAT_lncrna_annotations /path/to/lncrna/annotations
+#
b
diff -r 000000000000 -r a940c4a36a43 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Tue Jul 17 11:49:16 2018 -0400
b
@@ -0,0 +1,14 @@
+<tables>
+    <table name="ctat_genome_resource_libs" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, path</columns>
+        <file path="tool-data/ctat_genome_resource_libs.loc" />
+    </table>
+    <table name="ctat_centrifuge_indexes" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, path</columns>
+        <file path="tool-data/ctat_centrifuge_indexes.loc" />
+    </table>
+    <table name="ctat_lncrna_annotations" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, path</columns>
+        <file path="tool-data/ctat_lncrna_annotations.loc" />
+    </table>
+</tables>