Previous changeset 3:27d8ed014662 (2017-09-12) Next changeset 5:c6b0f5c3f3b4 (2017-09-13) |
Commit message:
planemo upload commit 93e677982c3636da455de2f827a87e516c7985ac-dirty |
modified:
chimerascan.xml |
added:
chimerascan/chimerascan_run.py |
b |
diff -r 27d8ed014662 -r 713d8c903d0d chimerascan.xml --- a/chimerascan.xml Tue Sep 12 14:13:20 2017 -0400 +++ b/chimerascan.xml Wed Sep 13 14:42:12 2017 -0400 |
[ |
@@ -1,8 +1,8 @@ <tool id="chimerascan" name="ChimeraScan"> <description>A tool for identifying chimeric transcription in sequencing data.</description> <command detect_errors="exit_code"><![CDATA[ - bash $__tool_directory__/run.sh && - python $__tool_directory__/chimerascan_run.py -p 8 $__tool_directory__/myindex + bash $__tool_directory__/run.sh $__tool_directory__ && + python chimerascan_run.py -p 8 myindex #if $input_type_conditional.chimerascan_input_type == "paired" $input_type_conditional.input_1 $input_type_conditional.input_2 #else |
b |
diff -r 27d8ed014662 -r 713d8c903d0d chimerascan/chimerascan_run.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan_run.py Wed Sep 13 14:42:12 2017 -0400 |
[ |
b'@@ -0,0 +1,1032 @@\n+#!/usr/bin/env python\n+\'\'\'\n+Created on Jan 5, 2011\n+\n+@author: mkiyer\n+\n+chimerascan: chimeric transcript discovery using RNA-seq\n+\n+Copyright (C) 2011 Matthew Iyer\n+\n+This program is free software: you can redistribute it and/or modify\n+it under the terms of the GNU General Public License as published by\n+the Free Software Foundation, either version 3 of the License, or\n+(at your option) any later version.\n+\n+This program is distributed in the hope that it will be useful,\n+but WITHOUT ANY WARRANTY; without even the implied warranty of\n+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n+GNU General Public License for more details.\n+\n+You should have received a copy of the GNU General Public License\n+along with this program. If not, see <http://www.gnu.org/licenses/>.\n+\'\'\'\n+from chimerascan import __version__\n+\n+__author__ = "Matthew Iyer"\n+__copyright__ = "Copyright 2011, chimerascan project"\n+__credits__ = ["Matthew Iyer", "Christopher Maher"]\n+__license__ = "GPL"\n+__maintainer__ = "Matthew Iyer"\n+__email__ = "mkiyer@med.umich.edu"\n+__status__ = "beta"\n+\n+###\n+#\n+# Modified by \n+#\tBaekdoo Kim(baegi7942@gmail.com)\n+#\n+###\n+\n+import logging\n+import os\n+import subprocess\n+import sys\n+import shutil\n+from optparse import OptionParser, OptionGroup\n+import xml.etree.ElementTree as etree\n+\n+# check for python version 2.6.0 or greater\n+if sys.version_info < (2,6,0):\n+ sys.stderr.write("You need python 2.6 or later to run chimerascan\\n")\n+ sys.exit(1)\n+\n+# local imports\n+from chimerascan import pysam\n+import chimerascan.lib.config as config\n+from chimerascan.lib.config import JOB_SUCCESS, JOB_ERROR, MIN_SEGMENT_LENGTH\n+from chimerascan.lib.base import LibraryTypes, check_executable, \\\n+ parse_bool, indent_xml, up_to_date\n+from chimerascan.lib.seq import FASTQ_QUAL_FORMATS, SANGER_FORMAT\n+from chimerascan.lib.fragment_size_distribution import InsertSizeDistribution\n+\n+from chimerascan.pipeline.fastq_inspect_reads import inspect_reads, detect_read_length, get_min_max_read_lengths\n+from chimerascan.pipeline.align_bowtie import align_pe, align_sr, trim_align_pe_sr\n+from chimerascan.pipeline.find_discordant_reads import find_discordant_fragments\n+from chimerascan.pipeline.discordant_reads_to_bedpe import discordant_reads_to_bedpe, sort_bedpe\n+from chimerascan.pipeline.nominate_chimeras import nominate_chimeras\n+from chimerascan.pipeline.chimeras_to_breakpoints import chimeras_to_breakpoints\n+from chimerascan.pipeline.nominate_spanning_reads import nominate_encomp_spanning_reads, extract_single_mapped_reads, nominate_single_mapped_spanning_reads\n+from chimerascan.pipeline.merge_spanning_alignments import merge_spanning_alignments\n+from chimerascan.pipeline.resolve_discordant_reads import resolve_discordant_reads\n+from chimerascan.pipeline.filter_chimeras import filter_chimeras, filter_highest_coverage_isoforms, filter_encompassing_chimeras\n+from chimerascan.pipeline.filter_homologous_genes import filter_homologous_genes\n+from chimerascan.pipeline.write_output import write_output\n+\n+# defaults for bowtie\n+DEFAULT_NUM_PROCESSORS = config.BASE_PROCESSORS\n+DEFAULT_BOWTIE_PATH = ""\n+DEFAULT_BOWTIE_ARGS = "--best --strata"\n+DEFAULT_DISCORD_BOWTIE_ARGS = "--best"\n+DEFAULT_MULTIHITS = 100\n+DEFAULT_MISMATCHES = 2\n+DEFAULT_DISCORD_MISMATCHES = 3\n+DEFAULT_SEGMENT_LENGTH = 25\n+DEFAULT_TRIM5 = 0\n+DEFAULT_TRIM3 = 0\n+DEFAULT_MIN_FRAG_LENGTH = 0\n+DEFAULT_MAX_FRAG_LENGTH = 1000\n+DEFAULT_NUM_SAMPLES_TO_DETERMINE_READ_LENGTHS = 10000\n+DEFAULT_FASTQ_QUAL_FORMAT = SANGER_FORMAT\n+DEFAULT_LIBRARY_TYPE = LibraryTypes.FR_UNSTRANDED\n+\n+DEFAULT_ISIZE_MEAN = 200\n+DEFAULT_ISIZE_STDEV = 40\n+DEFAULT_HOMOLOGY_MISMATCHES = config.BREAKPOINT_HOMOLOGY_MISMATCHES\n+DEFAULT_ANCHOR_MIN = 4\n+DEFAULT_ANCHOR_LENGTH = 8\n+DEFAULT_ANCHOR_MISMATCHES = 0\n+DEFAULT_FILTER_ISIZE_PROB = 0.01\n+DEFAULT_FILTER_UNIQUE_FRAGS = 2.0\n+DEFAULT_FILTER_ISOFORM_FRACTION = 0.01\n+NUM_POSITIONAL_ARGS = 4\n+DEFAULT_KEEP_TMP = True\n+\n+class RunConfig(object):\n+\n+ '..b'E)\n+ msg = "Filtering chimeras"\n+ if up_to_date(filtered_chimera_file, resolved_spanning_chimera_file):\n+ logging.info("[SKIPPED] %s" % (msg))\n+ else:\n+ logging.info(msg)\n+ # get insert size at prob\n+ filter_chimeras(input_file=resolved_spanning_chimera_file,\n+ output_file=filtered_chimera_file,\n+ index_dir=runconfig.index_dir,\n+ bam_file=sorted_aligned_bam_file,\n+ unique_frags=runconfig.filter_unique_frags,\n+ isoform_fraction=runconfig.filter_isoform_fraction,\n+ false_pos_file=runconfig.filter_false_pos_file)\n+ #\n+ # Filter homologous genes\n+ #\n+ homolog_filtered_chimera_file = os.path.join(tmp_dir, config.HOMOLOG_FILTERED_CHIMERA_FILE)\n+ msg = "Filtering homologous chimeras"\n+ if up_to_date(homolog_filtered_chimera_file, filtered_chimera_file):\n+ logging.info("[SKIPPED] %s" % (msg))\n+ else:\n+ logging.info(msg)\n+ min_isize = isize_dist.isize_at_percentile(1.0)\n+ max_isize = isize_dist.isize_at_percentile(99.0)\n+ filter_homologous_genes(input_file=filtered_chimera_file,\n+ output_file=homolog_filtered_chimera_file,\n+ index_dir=runconfig.index_dir,\n+ homolog_segment_length=runconfig.segment_length-1,\n+ min_isize=min_isize,\n+ max_isize=max_isize,\n+ bowtie_bin=bowtie_bin,\n+ num_processors=runconfig.num_processors,\n+ tmp_dir=tmp_dir)\n+ #\n+ # Choose best isoform for chimeras that share the same breakpoint\n+ #\n+ best_isoform_chimera_file = os.path.join(tmp_dir, config.BEST_FILTERED_CHIMERA_FILE)\n+ msg = "Choosing best isoform for each chimera"\n+ if up_to_date(best_isoform_chimera_file, homolog_filtered_chimera_file):\n+ logging.info("[SKIPPED] %s" % (msg))\n+ else:\n+ logging.info(msg)\n+ retcode = filter_highest_coverage_isoforms(index_dir=runconfig.index_dir,\n+ input_file=homolog_filtered_chimera_file,\n+ output_file=best_isoform_chimera_file)\n+ #\n+ # Write user-friendly output file\n+ #\n+ chimera_output_file = os.path.join(runconfig.output_dir, config.CHIMERA_OUTPUT_FILE)\n+ #msg = "Writing chimeras to file %s" % (chimera_output_file)\n+ if up_to_date(chimera_output_file, best_isoform_chimera_file):\n+ logging.info("[SKIPPED] %s" % (msg))\n+ else:\n+ logging.info(msg)\n+ write_output(best_isoform_chimera_file,\n+ bam_file=sorted_aligned_bam_file,\n+ output_file=chimera_output_file,\n+ index_dir=runconfig.index_dir)\n+ \n+ #\n+ # Move output to Galaxy data file\n+ #\n+ cmd = "mv %s/chimerascan_tmp/chimeras.bedpe %s/%s" % (os.path.dirname(runconfig.output_file_path), os.path.dirname(runconfig.output_file_path), runconfig.output_file_name)\n+ p = subprocess.check_output(cmd.split())\n+\n+ #\n+ # Cleanup\n+ #\n+ if not runconfig.keep_tmp:\n+ logging.info("Cleaning up temporary files")\n+ shutil.rmtree(tmp_dir)\n+ cmd_rm = "rm -r %s/chimerascan_tmp" % os.path.dirname(runconfig.output_file_path)\n+ p = subprocess.check_output(cmd_rm.split())\n+\n+ #\n+ # Done\n+ #\n+ logging.info("Finished run.")\n+ return JOB_SUCCESS\n+\n+\n+def main():\n+ logging.basicConfig(level=logging.INFO,\n+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")\n+ # parse run parameters in config file and command line\n+ runconfig = RunConfig()\n+ runconfig.from_args(sys.argv[1:])\n+ # run chimerascan\n+ sys.exit(run_chimerascan(runconfig))\n+\n+if __name__ == \'__main__\':\n+ main()\n+\n' |