Repository 'chimerascan'
hg clone https://toolshed.g2.bx.psu.edu/repos/bioitcore/chimerascan

Changeset 4:713d8c903d0d (2017-09-13)
Previous changeset 3:27d8ed014662 (2017-09-12) Next changeset 5:c6b0f5c3f3b4 (2017-09-13)
Commit message:
planemo upload commit 93e677982c3636da455de2f827a87e516c7985ac-dirty
modified:
chimerascan.xml
added:
chimerascan/chimerascan_run.py
b
diff -r 27d8ed014662 -r 713d8c903d0d chimerascan.xml
--- a/chimerascan.xml Tue Sep 12 14:13:20 2017 -0400
+++ b/chimerascan.xml Wed Sep 13 14:42:12 2017 -0400
[
@@ -1,8 +1,8 @@
 <tool id="chimerascan" name="ChimeraScan">
     <description>A tool for identifying chimeric transcription in sequencing data.</description>
     <command detect_errors="exit_code"><![CDATA[
-            bash $__tool_directory__/run.sh &&
-            python $__tool_directory__/chimerascan_run.py -p 8 $__tool_directory__/myindex
+            bash $__tool_directory__/run.sh $__tool_directory__ &&
+            python chimerascan_run.py -p 8 myindex
             #if $input_type_conditional.chimerascan_input_type == "paired"
                     $input_type_conditional.input_1 $input_type_conditional.input_2
             #else
b
diff -r 27d8ed014662 -r 713d8c903d0d chimerascan/chimerascan_run.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/chimerascan/chimerascan_run.py Wed Sep 13 14:42:12 2017 -0400
[
b'@@ -0,0 +1,1032 @@\n+#!/usr/bin/env python\n+\'\'\'\n+Created on Jan 5, 2011\n+\n+@author: mkiyer\n+\n+chimerascan: chimeric transcript discovery using RNA-seq\n+\n+Copyright (C) 2011 Matthew Iyer\n+\n+This program is free software: you can redistribute it and/or modify\n+it under the terms of the GNU General Public License as published by\n+the Free Software Foundation, either version 3 of the License, or\n+(at your option) any later version.\n+\n+This program is distributed in the hope that it will be useful,\n+but WITHOUT ANY WARRANTY; without even the implied warranty of\n+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n+GNU General Public License for more details.\n+\n+You should have received a copy of the GNU General Public License\n+along with this program.  If not, see <http://www.gnu.org/licenses/>.\n+\'\'\'\n+from chimerascan import __version__\n+\n+__author__ = "Matthew Iyer"\n+__copyright__ = "Copyright 2011, chimerascan project"\n+__credits__ = ["Matthew Iyer", "Christopher Maher"]\n+__license__ = "GPL"\n+__maintainer__ = "Matthew Iyer"\n+__email__ = "mkiyer@med.umich.edu"\n+__status__ = "beta"\n+\n+###\n+#\n+# Modified by \n+#\tBaekdoo Kim(baegi7942@gmail.com)\n+#\n+###\n+\n+import logging\n+import os\n+import subprocess\n+import sys\n+import shutil\n+from optparse import OptionParser, OptionGroup\n+import xml.etree.ElementTree as etree\n+\n+# check for python version 2.6.0 or greater\n+if sys.version_info < (2,6,0):\n+    sys.stderr.write("You need python 2.6 or later to run chimerascan\\n")\n+    sys.exit(1)\n+\n+# local imports\n+from chimerascan import pysam\n+import chimerascan.lib.config as config\n+from chimerascan.lib.config import JOB_SUCCESS, JOB_ERROR, MIN_SEGMENT_LENGTH\n+from chimerascan.lib.base import LibraryTypes, check_executable, \\\n+    parse_bool, indent_xml, up_to_date\n+from chimerascan.lib.seq import FASTQ_QUAL_FORMATS, SANGER_FORMAT\n+from chimerascan.lib.fragment_size_distribution import InsertSizeDistribution\n+\n+from chimerascan.pipeline.fastq_inspect_reads import inspect_reads, detect_read_length, get_min_max_read_lengths\n+from chimerascan.pipeline.align_bowtie import align_pe, align_sr, trim_align_pe_sr\n+from chimerascan.pipeline.find_discordant_reads import find_discordant_fragments\n+from chimerascan.pipeline.discordant_reads_to_bedpe import discordant_reads_to_bedpe, sort_bedpe\n+from chimerascan.pipeline.nominate_chimeras import nominate_chimeras\n+from chimerascan.pipeline.chimeras_to_breakpoints import chimeras_to_breakpoints\n+from chimerascan.pipeline.nominate_spanning_reads import nominate_encomp_spanning_reads, extract_single_mapped_reads, nominate_single_mapped_spanning_reads\n+from chimerascan.pipeline.merge_spanning_alignments import merge_spanning_alignments\n+from chimerascan.pipeline.resolve_discordant_reads import resolve_discordant_reads\n+from chimerascan.pipeline.filter_chimeras import filter_chimeras, filter_highest_coverage_isoforms, filter_encompassing_chimeras\n+from chimerascan.pipeline.filter_homologous_genes import filter_homologous_genes\n+from chimerascan.pipeline.write_output import write_output\n+\n+# defaults for bowtie\n+DEFAULT_NUM_PROCESSORS = config.BASE_PROCESSORS\n+DEFAULT_BOWTIE_PATH = ""\n+DEFAULT_BOWTIE_ARGS = "--best --strata"\n+DEFAULT_DISCORD_BOWTIE_ARGS = "--best"\n+DEFAULT_MULTIHITS = 100\n+DEFAULT_MISMATCHES = 2\n+DEFAULT_DISCORD_MISMATCHES = 3\n+DEFAULT_SEGMENT_LENGTH = 25\n+DEFAULT_TRIM5 = 0\n+DEFAULT_TRIM3 = 0\n+DEFAULT_MIN_FRAG_LENGTH = 0\n+DEFAULT_MAX_FRAG_LENGTH = 1000\n+DEFAULT_NUM_SAMPLES_TO_DETERMINE_READ_LENGTHS = 10000\n+DEFAULT_FASTQ_QUAL_FORMAT = SANGER_FORMAT\n+DEFAULT_LIBRARY_TYPE = LibraryTypes.FR_UNSTRANDED\n+\n+DEFAULT_ISIZE_MEAN = 200\n+DEFAULT_ISIZE_STDEV = 40\n+DEFAULT_HOMOLOGY_MISMATCHES = config.BREAKPOINT_HOMOLOGY_MISMATCHES\n+DEFAULT_ANCHOR_MIN = 4\n+DEFAULT_ANCHOR_LENGTH = 8\n+DEFAULT_ANCHOR_MISMATCHES = 0\n+DEFAULT_FILTER_ISIZE_PROB = 0.01\n+DEFAULT_FILTER_UNIQUE_FRAGS = 2.0\n+DEFAULT_FILTER_ISOFORM_FRACTION = 0.01\n+NUM_POSITIONAL_ARGS = 4\n+DEFAULT_KEEP_TMP = True\n+\n+class RunConfig(object):\n+\n+    '..b'E)\n+    msg = "Filtering chimeras"\n+    if up_to_date(filtered_chimera_file, resolved_spanning_chimera_file):\n+        logging.info("[SKIPPED] %s" % (msg))\n+    else:\n+        logging.info(msg)\n+        # get insert size at prob\n+        filter_chimeras(input_file=resolved_spanning_chimera_file,\n+                        output_file=filtered_chimera_file,\n+                        index_dir=runconfig.index_dir,\n+                        bam_file=sorted_aligned_bam_file,\n+                        unique_frags=runconfig.filter_unique_frags,\n+                        isoform_fraction=runconfig.filter_isoform_fraction,\n+                        false_pos_file=runconfig.filter_false_pos_file)\n+    #\n+    # Filter homologous genes\n+    #\n+    homolog_filtered_chimera_file = os.path.join(tmp_dir, config.HOMOLOG_FILTERED_CHIMERA_FILE)\n+    msg = "Filtering homologous chimeras"\n+    if up_to_date(homolog_filtered_chimera_file, filtered_chimera_file):\n+        logging.info("[SKIPPED] %s" % (msg))\n+    else:\n+        logging.info(msg)\n+        min_isize = isize_dist.isize_at_percentile(1.0)\n+        max_isize = isize_dist.isize_at_percentile(99.0)\n+        filter_homologous_genes(input_file=filtered_chimera_file,\n+                                output_file=homolog_filtered_chimera_file,\n+                                index_dir=runconfig.index_dir,\n+                                homolog_segment_length=runconfig.segment_length-1,\n+                                min_isize=min_isize,\n+                                max_isize=max_isize,\n+                                bowtie_bin=bowtie_bin,\n+                                num_processors=runconfig.num_processors,\n+                                tmp_dir=tmp_dir)\n+    #\n+    # Choose best isoform for chimeras that share the same breakpoint\n+    #\n+    best_isoform_chimera_file = os.path.join(tmp_dir, config.BEST_FILTERED_CHIMERA_FILE)\n+    msg = "Choosing best isoform for each chimera"\n+    if up_to_date(best_isoform_chimera_file, homolog_filtered_chimera_file):\n+        logging.info("[SKIPPED] %s" % (msg))\n+    else:\n+        logging.info(msg)\n+        retcode = filter_highest_coverage_isoforms(index_dir=runconfig.index_dir,\n+                                                   input_file=homolog_filtered_chimera_file,\n+                                                   output_file=best_isoform_chimera_file)\n+    #\n+    # Write user-friendly output file\n+    #\n+    chimera_output_file = os.path.join(runconfig.output_dir, config.CHIMERA_OUTPUT_FILE)\n+    #msg = "Writing chimeras to file %s" % (chimera_output_file)\n+    if up_to_date(chimera_output_file, best_isoform_chimera_file):\n+        logging.info("[SKIPPED] %s" % (msg))\n+    else:\n+        logging.info(msg)\n+        write_output(best_isoform_chimera_file,\n+                     bam_file=sorted_aligned_bam_file,\n+                     output_file=chimera_output_file,\n+                     index_dir=runconfig.index_dir)\n+    \n+    #\n+    # Move output to Galaxy data file\n+    #\n+    cmd = "mv %s/chimerascan_tmp/chimeras.bedpe %s/%s" % (os.path.dirname(runconfig.output_file_path), os.path.dirname(runconfig.output_file_path), runconfig.output_file_name)\n+    p = subprocess.check_output(cmd.split())\n+\n+    #\n+    # Cleanup\n+    #\n+    if not runconfig.keep_tmp:\n+        logging.info("Cleaning up temporary files")\n+        shutil.rmtree(tmp_dir)\n+    cmd_rm = "rm -r %s/chimerascan_tmp" % os.path.dirname(runconfig.output_file_path)\n+    p = subprocess.check_output(cmd_rm.split())\n+\n+    #\n+    # Done\n+    #\n+    logging.info("Finished run.")\n+    return JOB_SUCCESS\n+\n+\n+def main():\n+    logging.basicConfig(level=logging.INFO,\n+                        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")\n+    # parse run parameters in config file and command line\n+    runconfig = RunConfig()\n+    runconfig.from_args(sys.argv[1:])\n+    # run chimerascan\n+    sys.exit(run_chimerascan(runconfig))\n+\n+if __name__ == \'__main__\':\n+    main()\n+\n'