Mercurial > repos > weilong-guo > bs_seeker2
changeset 1:8b26adf64adc draft default tip
V2.0.5
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/COMMIT_EDITMSG Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,1 @@ +V2.0.5
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/HEAD Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,1 @@ +ref: refs/heads/master
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/config Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,13 @@ +[core] + repositoryformatversion = 0 + filemode = true + bare = false + logallrefupdates = true + ignorecase = true + precomposeunicode = false +[remote "origin"] + url = https://github.com/BSSeeker/BSseeker2.git + fetch = +refs/heads/*:refs/remotes/origin/* +[branch "master"] + remote = origin + merge = refs/heads/master
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/description Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,1 @@ +Unnamed repository; edit this file 'description' to name the repository.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/hooks/applypatch-msg.sample Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,15 @@ +#!/bin/sh +# +# An example hook script to check the commit log message taken by +# applypatch from an e-mail message. +# +# The hook should exit with non-zero status after issuing an +# appropriate message if it wants to stop the commit. The hook is +# allowed to edit the commit message file. +# +# To enable this hook, rename this file to "applypatch-msg". + +. git-sh-setup +test -x "$GIT_DIR/hooks/commit-msg" && + exec "$GIT_DIR/hooks/commit-msg" ${1+"$@"} +:
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/hooks/commit-msg.sample Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,24 @@ +#!/bin/sh +# +# An example hook script to check the commit log message. +# Called by "git commit" with one argument, the name of the file +# that has the commit message. The hook should exit with non-zero +# status after issuing an appropriate message if it wants to stop the +# commit. The hook is allowed to edit the commit message file. +# +# To enable this hook, rename this file to "commit-msg". + +# Uncomment the below to add a Signed-off-by line to the message. +# Doing this in a hook is a bad idea in general, but the prepare-commit-msg +# hook is more suited to it. +# +# SOB=$(git var GIT_AUTHOR_IDENT | sed -n 's/^\(.*>\).*$/Signed-off-by: \1/p') +# grep -qs "^$SOB" "$1" || echo "$SOB" >> "$1" + +# This example catches duplicate Signed-off-by lines. + +test "" = "$(grep '^Signed-off-by: ' "$1" | + sort | uniq -c | sed -e '/^[ ]*1[ ]/d')" || { + echo >&2 Duplicate Signed-off-by lines. + exit 1 +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/hooks/post-update.sample Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,8 @@ +#!/bin/sh +# +# An example hook script to prepare a packed repository for use over +# dumb transports. +# +# To enable this hook, rename this file to "post-update". + +exec git update-server-info
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/hooks/pre-applypatch.sample Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,14 @@ +#!/bin/sh +# +# An example hook script to verify what is about to be committed +# by applypatch from an e-mail message. +# +# The hook should exit with non-zero status after issuing an +# appropriate message if it wants to stop the commit. +# +# To enable this hook, rename this file to "pre-applypatch". + +. git-sh-setup +test -x "$GIT_DIR/hooks/pre-commit" && + exec "$GIT_DIR/hooks/pre-commit" ${1+"$@"} +:
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/hooks/pre-commit.sample Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,50 @@ +#!/bin/sh +# +# An example hook script to verify what is about to be committed. +# Called by "git commit" with no arguments. The hook should +# exit with non-zero status after issuing an appropriate message if +# it wants to stop the commit. +# +# To enable this hook, rename this file to "pre-commit". + +if git rev-parse --verify HEAD >/dev/null 2>&1 +then + against=HEAD +else + # Initial commit: diff against an empty tree object + against=4b825dc642cb6eb9a060e54bf8d69288fbee4904 +fi + +# If you want to allow non-ascii filenames set this variable to true. +allownonascii=$(git config hooks.allownonascii) + +# Redirect output to stderr. +exec 1>&2 + +# Cross platform projects tend to avoid non-ascii filenames; prevent +# them from being added to the repository. We exploit the fact that the +# printable range starts at the space character and ends with tilde. +if [ "$allownonascii" != "true" ] && + # Note that the use of brackets around a tr range is ok here, (it's + # even required, for portability to Solaris 10's /usr/bin/tr), since + # the square bracket bytes happen to fall in the designated range. + test $(git diff --cached --name-only --diff-filter=A -z $against | + LC_ALL=C tr -d '[ -~]\0' | wc -c) != 0 +then + echo "Error: Attempt to add a non-ascii file name." + echo + echo "This can cause problems if you want to work" + echo "with people on other platforms." + echo + echo "To be portable it is advisable to rename the file ..." + echo + echo "If you know what you are doing you can disable this" + echo "check using:" + echo + echo " git config hooks.allownonascii true" + echo + exit 1 +fi + +# If there are whitespace errors, print the offending file names and fail. +exec git diff-index --check --cached $against --
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/hooks/pre-push.sample Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,53 @@ +#!/bin/sh + +# An example hook script to verify what is about to be pushed. Called by "git +# push" after it has checked the remote status, but before anything has been +# pushed. If this script exits with a non-zero status nothing will be pushed. +# +# This hook is called with the following parameters: +# +# $1 -- Name of the remote to which the push is being done +# $2 -- URL to which the push is being done +# +# If pushing without using a named remote those arguments will be equal. +# +# Information about the commits which are being pushed is supplied as lines to +# the standard input in the form: +# +# <local ref> <local sha1> <remote ref> <remote sha1> +# +# This sample shows how to prevent push of commits where the log message starts +# with "WIP" (work in progress). + +remote="$1" +url="$2" + +z40=0000000000000000000000000000000000000000 + +IFS=' ' +while read local_ref local_sha remote_ref remote_sha +do + if [ "$local_sha" = $z40 ] + then + # Handle delete + else + if [ "$remote_sha" = $z40 ] + then + # New branch, examine all commits + range="$local_sha" + else + # Update to existing branch, examine new commits + range="$remote_sha..$local_sha" + fi + + # Check for WIP commit + commit=`git rev-list -n 1 --grep '^WIP' "$range"` + if [ -n "$commit" ] + then + echo "Found WIP commit in $local_ref, not pushing" + exit 1 + fi + fi +done + +exit 0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/hooks/pre-rebase.sample Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,169 @@ +#!/bin/sh +# +# Copyright (c) 2006, 2008 Junio C Hamano +# +# The "pre-rebase" hook is run just before "git rebase" starts doing +# its job, and can prevent the command from running by exiting with +# non-zero status. +# +# The hook is called with the following parameters: +# +# $1 -- the upstream the series was forked from. +# $2 -- the branch being rebased (or empty when rebasing the current branch). +# +# This sample shows how to prevent topic branches that are already +# merged to 'next' branch from getting rebased, because allowing it +# would result in rebasing already published history. + +publish=next +basebranch="$1" +if test "$#" = 2 +then + topic="refs/heads/$2" +else + topic=`git symbolic-ref HEAD` || + exit 0 ;# we do not interrupt rebasing detached HEAD +fi + +case "$topic" in +refs/heads/??/*) + ;; +*) + exit 0 ;# we do not interrupt others. + ;; +esac + +# Now we are dealing with a topic branch being rebased +# on top of master. Is it OK to rebase it? + +# Does the topic really exist? +git show-ref -q "$topic" || { + echo >&2 "No such branch $topic" + exit 1 +} + +# Is topic fully merged to master? +not_in_master=`git rev-list --pretty=oneline ^master "$topic"` +if test -z "$not_in_master" +then + echo >&2 "$topic is fully merged to master; better remove it." + exit 1 ;# we could allow it, but there is no point. +fi + +# Is topic ever merged to next? If so you should not be rebasing it. +only_next_1=`git rev-list ^master "^$topic" ${publish} | sort` +only_next_2=`git rev-list ^master ${publish} | sort` +if test "$only_next_1" = "$only_next_2" +then + not_in_topic=`git rev-list "^$topic" master` + if test -z "$not_in_topic" + then + echo >&2 "$topic is already up-to-date with master" + exit 1 ;# we could allow it, but there is no point. + else + exit 0 + fi +else + not_in_next=`git rev-list --pretty=oneline ^${publish} "$topic"` + /usr/bin/perl -e ' + my $topic = $ARGV[0]; + my $msg = "* $topic has commits already merged to public branch:\n"; + my (%not_in_next) = map { + /^([0-9a-f]+) /; + ($1 => 1); + } split(/\n/, $ARGV[1]); + for my $elem (map { + /^([0-9a-f]+) (.*)$/; + [$1 => $2]; + } split(/\n/, $ARGV[2])) { + if (!exists $not_in_next{$elem->[0]}) { + if ($msg) { + print STDERR $msg; + undef $msg; + } + print STDERR " $elem->[1]\n"; + } + } + ' "$topic" "$not_in_next" "$not_in_master" + exit 1 +fi + +exit 0 + +################################################################ + +This sample hook safeguards topic branches that have been +published from being rewound. + +The workflow assumed here is: + + * Once a topic branch forks from "master", "master" is never + merged into it again (either directly or indirectly). + + * Once a topic branch is fully cooked and merged into "master", + it is deleted. If you need to build on top of it to correct + earlier mistakes, a new topic branch is created by forking at + the tip of the "master". This is not strictly necessary, but + it makes it easier to keep your history simple. + + * Whenever you need to test or publish your changes to topic + branches, merge them into "next" branch. + +The script, being an example, hardcodes the publish branch name +to be "next", but it is trivial to make it configurable via +$GIT_DIR/config mechanism. + +With this workflow, you would want to know: + +(1) ... if a topic branch has ever been merged to "next". Young + topic branches can have stupid mistakes you would rather + clean up before publishing, and things that have not been + merged into other branches can be easily rebased without + affecting other people. But once it is published, you would + not want to rewind it. + +(2) ... if a topic branch has been fully merged to "master". + Then you can delete it. More importantly, you should not + build on top of it -- other people may already want to + change things related to the topic as patches against your + "master", so if you need further changes, it is better to + fork the topic (perhaps with the same name) afresh from the + tip of "master". + +Let's look at this example: + + o---o---o---o---o---o---o---o---o---o "next" + / / / / + / a---a---b A / / + / / / / + / / c---c---c---c B / + / / / \ / + / / / b---b C \ / + / / / / \ / + ---o---o---o---o---o---o---o---o---o---o---o "master" + + +A, B and C are topic branches. + + * A has one fix since it was merged up to "next". + + * B has finished. It has been fully merged up to "master" and "next", + and is ready to be deleted. + + * C has not merged to "next" at all. + +We would want to allow C to be rebased, refuse A, and encourage +B to be deleted. + +To compute (1): + + git rev-list ^master ^topic next + git rev-list ^master next + + if these match, topic has not merged in next at all. + +To compute (2): + + git rev-list master..topic + + if this is empty, it is fully merged to "master".
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/hooks/prepare-commit-msg.sample Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,36 @@ +#!/bin/sh +# +# An example hook script to prepare the commit log message. +# Called by "git commit" with the name of the file that has the +# commit message, followed by the description of the commit +# message's source. The hook's purpose is to edit the commit +# message file. If the hook fails with a non-zero status, +# the commit is aborted. +# +# To enable this hook, rename this file to "prepare-commit-msg". + +# This hook includes three examples. The first comments out the +# "Conflicts:" part of a merge commit. +# +# The second includes the output of "git diff --name-status -r" +# into the message, just before the "git status" output. It is +# commented because it doesn't cope with --amend or with squashed +# commits. +# +# The third example adds a Signed-off-by line to the message, that can +# still be edited. This is rarely a good idea. + +case "$2,$3" in + merge,) + /usr/bin/perl -i.bak -ne 's/^/# /, s/^# #/#/ if /^Conflicts/ .. /#/; print' "$1" ;; + +# ,|template,) +# /usr/bin/perl -i.bak -pe ' +# print "\n" . `git diff --cached --name-status -r` +# if /^#/ && $first++ == 0' "$1" ;; + + *) ;; +esac + +# SOB=$(git var GIT_AUTHOR_IDENT | sed -n 's/^\(.*>\).*$/Signed-off-by: \1/p') +# grep -qs "^$SOB" "$1" || echo "$SOB" >> "$1"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/hooks/update.sample Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,128 @@ +#!/bin/sh +# +# An example hook script to blocks unannotated tags from entering. +# Called by "git receive-pack" with arguments: refname sha1-old sha1-new +# +# To enable this hook, rename this file to "update". +# +# Config +# ------ +# hooks.allowunannotated +# This boolean sets whether unannotated tags will be allowed into the +# repository. By default they won't be. +# hooks.allowdeletetag +# This boolean sets whether deleting tags will be allowed in the +# repository. By default they won't be. +# hooks.allowmodifytag +# This boolean sets whether a tag may be modified after creation. By default +# it won't be. +# hooks.allowdeletebranch +# This boolean sets whether deleting branches will be allowed in the +# repository. By default they won't be. +# hooks.denycreatebranch +# This boolean sets whether remotely creating branches will be denied +# in the repository. By default this is allowed. +# + +# --- Command line +refname="$1" +oldrev="$2" +newrev="$3" + +# --- Safety check +if [ -z "$GIT_DIR" ]; then + echo "Don't run this script from the command line." >&2 + echo " (if you want, you could supply GIT_DIR then run" >&2 + echo " $0 <ref> <oldrev> <newrev>)" >&2 + exit 1 +fi + +if [ -z "$refname" -o -z "$oldrev" -o -z "$newrev" ]; then + echo "Usage: $0 <ref> <oldrev> <newrev>" >&2 + exit 1 +fi + +# --- Config +allowunannotated=$(git config --bool hooks.allowunannotated) +allowdeletebranch=$(git config --bool hooks.allowdeletebranch) +denycreatebranch=$(git config --bool hooks.denycreatebranch) +allowdeletetag=$(git config --bool hooks.allowdeletetag) +allowmodifytag=$(git config --bool hooks.allowmodifytag) + +# check for no description +projectdesc=$(sed -e '1q' "$GIT_DIR/description") +case "$projectdesc" in +"Unnamed repository"* | "") + echo "*** Project description file hasn't been set" >&2 + exit 1 + ;; +esac + +# --- Check types +# if $newrev is 0000...0000, it's a commit to delete a ref. +zero="0000000000000000000000000000000000000000" +if [ "$newrev" = "$zero" ]; then + newrev_type=delete +else + newrev_type=$(git cat-file -t $newrev) +fi + +case "$refname","$newrev_type" in + refs/tags/*,commit) + # un-annotated tag + short_refname=${refname##refs/tags/} + if [ "$allowunannotated" != "true" ]; then + echo "*** The un-annotated tag, $short_refname, is not allowed in this repository" >&2 + echo "*** Use 'git tag [ -a | -s ]' for tags you want to propagate." >&2 + exit 1 + fi + ;; + refs/tags/*,delete) + # delete tag + if [ "$allowdeletetag" != "true" ]; then + echo "*** Deleting a tag is not allowed in this repository" >&2 + exit 1 + fi + ;; + refs/tags/*,tag) + # annotated tag + if [ "$allowmodifytag" != "true" ] && git rev-parse $refname > /dev/null 2>&1 + then + echo "*** Tag '$refname' already exists." >&2 + echo "*** Modifying a tag is not allowed in this repository." >&2 + exit 1 + fi + ;; + refs/heads/*,commit) + # branch + if [ "$oldrev" = "$zero" -a "$denycreatebranch" = "true" ]; then + echo "*** Creating a branch is not allowed in this repository" >&2 + exit 1 + fi + ;; + refs/heads/*,delete) + # delete branch + if [ "$allowdeletebranch" != "true" ]; then + echo "*** Deleting a branch is not allowed in this repository" >&2 + exit 1 + fi + ;; + refs/remotes/*,commit) + # tracking branch + ;; + refs/remotes/*,delete) + # delete tracking branch + if [ "$allowdeletebranch" != "true" ]; then + echo "*** Deleting a tracking branch is not allowed in this repository" >&2 + exit 1 + fi + ;; + *) + # Anything else (is there anything else?) + echo "*** Update hook: unknown type of update to ref $refname of type $newrev_type" >&2 + exit 1 + ;; +esac + +# --- Finished +exit 0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/info/exclude Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,7 @@ +# git ls-files --others --exclude-from=.git/info/exclude +# Lines that start with '#' are comments. +# For a project mostly in C, the following would be a good set of +# exclude patterns (uncomment them if you want to use them): +# *.[oa] +# *~ +.DS_Store
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/logs/HEAD Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,17 @@ +0000000000000000000000000000000000000000 6d252c627030a29eff9cc938f74c0be27ec853f0 Weilong Guo <weilong@Weilongs-MacBook-Pro.local> 1365490332 -0700 clone: from https://github.com/BSSeeker/BSseeker2.git +6d252c627030a29eff9cc938f74c0be27ec853f0 565f548bc9a4ffcad63323481e8b03cb78a84b0a weilong <guoweilong@gmail.com> 1365495166 -0700 commit: update by Weilong +565f548bc9a4ffcad63323481e8b03cb78a84b0a 1373f3c499ff4b6fb8ddff39e8859fa5326d0098 weilong <guoweilong@gmail.com> 1366217039 -0700 commit: Add version track; +1373f3c499ff4b6fb8ddff39e8859fa5326d0098 ffe723a1f313914590bb10496b6d418d8a2ea614 weilong <guoweilong@gmail.com> 1366521472 -0700 commit: RRBS un-directional +ffe723a1f313914590bb10496b6d418d8a2ea614 203e3ea9ba50c325d53e45f6d31686b6b23bba41 weilong <guoweilong@gmail.com> 1368857623 -0700 commit: Multiple-sites updates +203e3ea9ba50c325d53e45f6d31686b6b23bba41 6aeb6fc0a2180cd14c59d772e05c4c3139f37f2e weilong <guoweilong@gmail.com> 1368872339 -0700 commit: Clean-up/Galaxy/Test_data +6aeb6fc0a2180cd14c59d772e05c4c3139f37f2e 4a0fe7b0dfa14e5b4b62b5ead7a624f7361fee12 weilong <guoweilong@gmail.com> 1368872394 -0700 commit: Clean-up/galaxy/test_data +4a0fe7b0dfa14e5b4b62b5ead7a624f7361fee12 3544b8bb1e5666d100f7652fc63b985187990e49 weilong <guoweilong@gmail.com> 1369030442 -0700 commit: Fix bug on utils.py; Increased version; Add UPDATE +3544b8bb1e5666d100f7652fc63b985187990e49 39508bee8506ce06feead7599a53ffd09cc89198 weilong <guoweilong@gmail.com> 1369355033 -0700 commit: support diff indexes +39508bee8506ce06feead7599a53ffd09cc89198 99e2633f8338b3c1f44cf3d1e2fa7ed5e3ce73ed weilong <guoweilong@gmail.com> 1369785459 -0700 commit: Update Readme +99e2633f8338b3c1f44cf3d1e2fa7ed5e3ce73ed 823880bb2ebba5ae6bd1c7de7dfc4ce03935155b weilong <guoweilong@gmail.com> 1369876356 -0700 commit: fix bug in galaxy; chmod +x *.py +823880bb2ebba5ae6bd1c7de7dfc4ce03935155b 7664a927ba6f12b023f6cd72dc407e616c5b85e5 weilong <guoweilong@gmail.com> 1370044685 -0700 commit: fix galaxy xml +7664a927ba6f12b023f6cd72dc407e616c5b85e5 3c14c7a6fc9fd01ef7ffc3bc3b16153e20fda931 weilong <guoweilong@gmail.com> 1371202921 -0700 commit: fix bugs for RRBS tag +3c14c7a6fc9fd01ef7ffc3bc3b16153e20fda931 58de749ac6ebe90f32db2c89cdd72507669695e2 weilong <guoweilong@gmail.com> 1371366774 -0700 commit: fix bugs for RRBS tag +58de749ac6ebe90f32db2c89cdd72507669695e2 5ffa6c0f3f3d1b78e2899cc1ad9636affb5f2330 weilong <guoweilong@gmail.com> 1375139287 -0700 commit: v2.0.4 +5ffa6c0f3f3d1b78e2899cc1ad9636affb5f2330 0c877f0900535fbee6909dc1591e8ad0eac29c52 weilong <guoweilong@gmail.com> 1375307754 -0700 commit: fix a bug +0c877f0900535fbee6909dc1591e8ad0eac29c52 85a936d6dab3f3c5585bcab2eee7b18bbc7d7fd8 weilong <guoweilong@gmail.com> 1383633776 +0800 commit: V2.0.5
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/logs/refs/heads/master Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,17 @@ +0000000000000000000000000000000000000000 6d252c627030a29eff9cc938f74c0be27ec853f0 Weilong Guo <weilong@Weilongs-MacBook-Pro.local> 1365490332 -0700 clone: from https://github.com/BSSeeker/BSseeker2.git +6d252c627030a29eff9cc938f74c0be27ec853f0 565f548bc9a4ffcad63323481e8b03cb78a84b0a weilong <guoweilong@gmail.com> 1365495166 -0700 commit: update by Weilong +565f548bc9a4ffcad63323481e8b03cb78a84b0a 1373f3c499ff4b6fb8ddff39e8859fa5326d0098 weilong <guoweilong@gmail.com> 1366217039 -0700 commit: Add version track; +1373f3c499ff4b6fb8ddff39e8859fa5326d0098 ffe723a1f313914590bb10496b6d418d8a2ea614 weilong <guoweilong@gmail.com> 1366521472 -0700 commit: RRBS un-directional +ffe723a1f313914590bb10496b6d418d8a2ea614 203e3ea9ba50c325d53e45f6d31686b6b23bba41 weilong <guoweilong@gmail.com> 1368857623 -0700 commit: Multiple-sites updates +203e3ea9ba50c325d53e45f6d31686b6b23bba41 6aeb6fc0a2180cd14c59d772e05c4c3139f37f2e weilong <guoweilong@gmail.com> 1368872339 -0700 commit: Clean-up/Galaxy/Test_data +6aeb6fc0a2180cd14c59d772e05c4c3139f37f2e 4a0fe7b0dfa14e5b4b62b5ead7a624f7361fee12 weilong <guoweilong@gmail.com> 1368872394 -0700 commit: Clean-up/galaxy/test_data +4a0fe7b0dfa14e5b4b62b5ead7a624f7361fee12 3544b8bb1e5666d100f7652fc63b985187990e49 weilong <guoweilong@gmail.com> 1369030442 -0700 commit: Fix bug on utils.py; Increased version; Add UPDATE +3544b8bb1e5666d100f7652fc63b985187990e49 39508bee8506ce06feead7599a53ffd09cc89198 weilong <guoweilong@gmail.com> 1369355033 -0700 commit: support diff indexes +39508bee8506ce06feead7599a53ffd09cc89198 99e2633f8338b3c1f44cf3d1e2fa7ed5e3ce73ed weilong <guoweilong@gmail.com> 1369785459 -0700 commit: Update Readme +99e2633f8338b3c1f44cf3d1e2fa7ed5e3ce73ed 823880bb2ebba5ae6bd1c7de7dfc4ce03935155b weilong <guoweilong@gmail.com> 1369876356 -0700 commit: fix bug in galaxy; chmod +x *.py +823880bb2ebba5ae6bd1c7de7dfc4ce03935155b 7664a927ba6f12b023f6cd72dc407e616c5b85e5 weilong <guoweilong@gmail.com> 1370044685 -0700 commit: fix galaxy xml +7664a927ba6f12b023f6cd72dc407e616c5b85e5 3c14c7a6fc9fd01ef7ffc3bc3b16153e20fda931 weilong <guoweilong@gmail.com> 1371202921 -0700 commit: fix bugs for RRBS tag +3c14c7a6fc9fd01ef7ffc3bc3b16153e20fda931 58de749ac6ebe90f32db2c89cdd72507669695e2 weilong <guoweilong@gmail.com> 1371366774 -0700 commit: fix bugs for RRBS tag +58de749ac6ebe90f32db2c89cdd72507669695e2 5ffa6c0f3f3d1b78e2899cc1ad9636affb5f2330 weilong <guoweilong@gmail.com> 1375139287 -0700 commit: v2.0.4 +5ffa6c0f3f3d1b78e2899cc1ad9636affb5f2330 0c877f0900535fbee6909dc1591e8ad0eac29c52 weilong <guoweilong@gmail.com> 1375307754 -0700 commit: fix a bug +0c877f0900535fbee6909dc1591e8ad0eac29c52 85a936d6dab3f3c5585bcab2eee7b18bbc7d7fd8 weilong <guoweilong@gmail.com> 1383633776 +0800 commit: V2.0.5
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/logs/refs/remotes/origin/HEAD Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,1 @@ +0000000000000000000000000000000000000000 6d252c627030a29eff9cc938f74c0be27ec853f0 Weilong Guo <weilong@Weilongs-MacBook-Pro.local> 1365490332 -0700 clone: from https://github.com/BSSeeker/BSseeker2.git
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/logs/refs/remotes/origin/master Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,15 @@ +6d252c627030a29eff9cc938f74c0be27ec853f0 565f548bc9a4ffcad63323481e8b03cb78a84b0a weilong <guoweilong@gmail.com> 1365495210 -0700 push: fast forward +565f548bc9a4ffcad63323481e8b03cb78a84b0a 1373f3c499ff4b6fb8ddff39e8859fa5326d0098 weilong <guoweilong@gmail.com> 1366217075 -0700 push: fast forward +1373f3c499ff4b6fb8ddff39e8859fa5326d0098 ffe723a1f313914590bb10496b6d418d8a2ea614 weilong <guoweilong@gmail.com> 1366521489 -0700 update by push +ffe723a1f313914590bb10496b6d418d8a2ea614 203e3ea9ba50c325d53e45f6d31686b6b23bba41 weilong <guoweilong@gmail.com> 1368857637 -0700 update by push +203e3ea9ba50c325d53e45f6d31686b6b23bba41 4a0fe7b0dfa14e5b4b62b5ead7a624f7361fee12 weilong <guoweilong@gmail.com> 1368872449 -0700 update by push +4a0fe7b0dfa14e5b4b62b5ead7a624f7361fee12 3544b8bb1e5666d100f7652fc63b985187990e49 weilong <guoweilong@gmail.com> 1369030453 -0700 update by push +3544b8bb1e5666d100f7652fc63b985187990e49 39508bee8506ce06feead7599a53ffd09cc89198 weilong <guoweilong@gmail.com> 1369355040 -0700 update by push +39508bee8506ce06feead7599a53ffd09cc89198 99e2633f8338b3c1f44cf3d1e2fa7ed5e3ce73ed weilong <guoweilong@gmail.com> 1369785465 -0700 update by push +99e2633f8338b3c1f44cf3d1e2fa7ed5e3ce73ed 823880bb2ebba5ae6bd1c7de7dfc4ce03935155b weilong <guoweilong@gmail.com> 1369876368 -0700 update by push +823880bb2ebba5ae6bd1c7de7dfc4ce03935155b 7664a927ba6f12b023f6cd72dc407e616c5b85e5 weilong <guoweilong@gmail.com> 1370044698 -0700 update by push +7664a927ba6f12b023f6cd72dc407e616c5b85e5 3c14c7a6fc9fd01ef7ffc3bc3b16153e20fda931 weilong <guoweilong@gmail.com> 1371202931 -0700 update by push +3c14c7a6fc9fd01ef7ffc3bc3b16153e20fda931 58de749ac6ebe90f32db2c89cdd72507669695e2 weilong <guoweilong@gmail.com> 1371366804 -0700 update by push +58de749ac6ebe90f32db2c89cdd72507669695e2 5ffa6c0f3f3d1b78e2899cc1ad9636affb5f2330 weilong <guoweilong@gmail.com> 1375139295 -0700 update by push +5ffa6c0f3f3d1b78e2899cc1ad9636affb5f2330 0c877f0900535fbee6909dc1591e8ad0eac29c52 weilong <guoweilong@gmail.com> 1375307762 -0700 update by push +0c877f0900535fbee6909dc1591e8ad0eac29c52 85a936d6dab3f3c5585bcab2eee7b18bbc7d7fd8 weilong <guoweilong@gmail.com> 1383633798 +0800 update by push
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/objects/04/bd24805dff9825629951311783935221cb83bf Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,4 @@ +x+)JMU054c040031Qp ñð +f˜wÿ}°ún±yê·¬‘ýüPì¡]0P‰¹©©‚[fNIjQPjbJ±^A%Ã>Yï™3‚Œ±ž›v<ÿõr±?,lmPÃ|<]ý‚]Jmzšðä‹Ð +©Ûj›ÿ³‰C•¹:ºøºêå¦0Ô¥¼‰w8Ç}aq¼€×'sM#s‘mpE>®ŽÁ®ñ~þ!®Á÷Ÿ$.ž]攸tO®_£ƒÛ·ò/&@ TŸ˜“™žÇàõÍÿþôÊI2N‘Ï ç¤´œ>}b)\If^Jjƒ[ûû£7&8¶‹œ^œl!se³àdýWP)NMÍN-2Òò¥lVl•}RƒfNIw¸ÛzŸU*Zå˜Ê“J3sR@ÊÃ_nñaà-{Àëd¥¶fúòSvv`*ONÌɉÏM-ɨÌI,ÉÌÏé|³óùåÉÜ1Ñ3mJîóÜcm8ñîôÒ’Ìœb•¿ÿŸËh +þ¹‘`ݺgÇ+ËNC•¤'æ$VT2(Nùi[Ó4«r}øjm£[ÿîCÌ(I-.‰OI,Id˜ÕÂð¼ý·|XÞÜ«ýYÁŒë¸ÉÌÝ \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/objects/0c/877f0900535fbee6909dc1591e8ad0eac29c52 Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,2 @@ +x•Ž[ +Â0EýÎ*fJ’É£·2™fb¡1RRtùöà øqápàÀåÞÚ2Àšé4¶R ³gažØ%fÑȳ%‰sáXQ/ÚÊs€¡ÀZPp69NÅN)1šSÀ@"Ù‹EÔŠöñè¼Ë²ög…kÝûïµÑ²^¸·ŒuŒÞÁYGÕa£ü_*Y>@÷ª¾z†Eí \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/objects/13/73f3c499ff4b6fb8ddff39e8859fa5326d0098 Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,1 @@ +x•ËmÃ0DsfÛ€ þEÁA`§¥‚år¥Å€¦öÃCÈm0ƒÌ£ZJî }xéf¥&s$Òz¶”¤UFaœVéוØk9ñλÕÙiF;æ£ ŠC”†â0Ø(Qà£Ö?œ÷zlðº=ê_¾nó~¦ZÞ@ﵚ¤™á$')ÅhÇ¿Îÿ'Å-%xr»çz@oH_q#âvÜóv”¡p?ó,Ëû¯~¿ÞmWx \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/objects/20/3e3ea9ba50c325d53e45f6d31686b6b23bba41 Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,1 @@ +x•ŽË Ã0DsVjÀA«ÕÏBH+kåäÈØ2i?>¤œfxÙڲ”.5Â¥oÌ2¸œÆ¬Ù§D:±E"E^<’=—hâäÅJ¿»Ì™½F‚Œ€#;ªA™ÑE—„H390‚Žþj›üp©í=ËÛ|´_Ì•zÚr—€.ëF9(¯”8éù¯óÿ¦xµ—µò°—λ<ÖDgŠ/eK \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/objects/33/68604160305d798b0aa8a101857b39a16aab92 Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,2 @@ +xŽA E]sŠ¹@ 1Æ3x¦•¤ÓR½¾ìÜ»ú??yùj)¹ºÈSÛ˜ÁŒ¹˜‚‹¬(¢2Ò{©Ðg}@ +^'gˆ¤x…×iÇ&¡&D$VÎûHcAáhÒź¨'Ÿ‰p´gÝ`>ê‡óR×®¿~ŸKÈË™j¹Á¨Q;…è-ÒJ)úÚÿ‹¿óΰSÝr?º¯ihuè¥&^ÄRR \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/objects/39/508bee8506ce06feead7599a53ffd09cc89198 Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,4 @@ +x•ŽQ +à Dûí)ö-êªÉB)½JÔ5b‰¡=~ýèú7ó`xj)¹Öti;3mIb«| +h=!1E“µRû^'#‹mÚym€Ö?z¯Ø:碒2 Îêz"ɆÄt¶WÝáÍy©ë÷ù¬¿üœË”—[¨å +a!ÂURŠNû¿Æÿ/Åqn[ÝÄœä5ò‡ñ½ßIH \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/objects/3c/14c7a6fc9fd01ef7ffc3bc3b16153e20fda931 Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,1 @@ +x•ŽAjÃ0E»Ö)æ #ÙÒØPBÈÜÌH#Õ`GÅ‘IŽ/zì>ïǺ®s×áWÛTA†˜ý„¼rgìU¨ëù 6KˆcÊÖüñ¦÷BÏ£#át]1‘K±GÒ`Cô2xõ†÷ö[7xê¼Ô{ï²×ÿ}-+ÏË9Öõ¶#ëÐΠÑôø×ôsÓäù²—ä£;M·h\ÌцI¶ \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/objects/41/2eeda78dc9de1186c2e0e1526764af82ab3431 Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,2 @@ +xu±N1†Y¹§°ÔCXn@:!Qµbõ“”‹£ÄA…§ÇW˜ŠñéÓûÿ‡ÈÜ?ÜÝ,à± 9q£€¸³€ÑUÀD]ñ\&xÚBÒcøB œºåÅìQ¿vÝÖ +O .¼„Ú0ÂAî–«±Â<¼ïÇú†%+¬1Ípråäú–.Õ̅߯àÇ`@o™ô×ÔdÑX´ãT?ë)ˆÞ'o"a+çˆAK6Ïk“«þǵ8±üV¾Þ4y&oòÝfkæ)âM¾?Zþ7ý}§ú \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/objects/41/ae40d93e4224bc6438f6ef14c15a2846138065 Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,2 @@ +xŽM +Â0„]ç¹€òòŸ€ˆn<€7xI_j mJLoowîÝ Ã7“ê<—Î¥†CoDÜç(¤!‹!¢1Jëƒ% ò&Û,2Y¶b£¥sK:¡ÃŽgÊk…’¤ÄhApŠáÖŸµñq«o*S]F~þåë8c™N©Î.”ÕÞ§?‚`{»_ìôטÝR¢ vâ÷2í–áð:ö'M4 \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/objects/56/5f548bc9a4ffcad63323481e8b03cb78a84b0a Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,2 @@ +x•Ž[ +Â0EýÎ*fÊdÒLq~ç1©…Ö”’"îÞŠnÀ¿Ãý©ÎóØ€ˆm‘è5£ÉKqž;2—Hd£6‰É]¯£ZÂ*œÉÒÇ£Á@^Jñ)yÓ×%ŒBNRoMA¶v¯+<eœêc€ó°Õ_‡9ŒÓ)ÕùÚ°í¼ÕÌpD‡¨v»ÿkò©¶%‡&_pûŽª7<óH… \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/objects/58/de749ac6ebe90f32db2c89cdd72507669695e2 Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,2 @@ +x•ŽQjÃ0DûSìZ´^YkC!GHO°–wUClG¦9~ôÑæcx¼Ie]— +ùº«Eê™R†ÑF¤8©0÷¾›%hHb„³{È®[JK´4ÚìQÍM-±'í¼Í2:9êOÙáW—{Ù2œòQþú%¯²Ü¿RYÏ€ÄÍ™|zöÞµµý«úÒÙò‚éÈO°æ½Ý®ßP%»7ŸGIÌ \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/objects/5f/fa6c0f3f3d1b78e2899cc1ad9636affb5f2330 Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,2 @@ +x•Ž[ +Â0EýÎ*²Ëdòq+yLj¡i¤¤º}ûáü»8œ›{k˨ìeìÌL*hØR+´‰¬ÒJù I[D•SЩŠWÜyÒ†ÂÞPÌŽT%a”KñhÁ;GŽ,£ˆÇxö]~xYû6ËÛ|ôß~Ì-.ë”{»K¥ýY#^^Áˆ“žÿÿoŠ7N0ñtÖB \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/objects/68/aefe44af07a6434b8d1289b3b51ec01e2d1d18 Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,1 @@ +x+)JMU07e040031QˆÏÌË,‰×+¨d(á[‘jtcòBõ°û»O0YUVZ’™SRd¾!ÿ—•}¹>GMåÓv·‡Â/‰<ý \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/objects/6d/252c627030a29eff9cc938f74c0be27ec853f0 Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,2 @@ +xŽM +Â0F]ç¹€2™´3ñ"¿5Ð))^ßîÜ»ûÞƒ_ìÕ¡íil9kǘhdž°¤Øzãر% Œ1‚Qo¿å×Ð.bPfo`¢è©HIQØ à§AÈå÷ñì›^öþÉuí¯E_û¾4_×Kìí¦¥y¶ˆú {\ù¯X=zª¥æ¤¾í@Fà \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/objects/71/5f8471c2dc3e60e99dfc042aed1b46bb9cc095 Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,1 @@ +x+)JMU04´d040031QˆÏÌË,‰×+¨d(á[‘jtcòBõ°û»O0YUVT”TŸTš™“R¨¸çÇiËŸuU%ÿ=8¹Î tñ¨Âòt„²ŒÝ[\4c.î^v¹÷ȉ÷¡¥Èc4 \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/objects/74/11c2b2e56b60e4f412a81af1b7607d83ff0617 Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,5 @@ +x]RÝoÚ0ß³ÿŠO”v£{ÚÞLbŠµ ÇŒñ˜C¼…ÅN«þ÷»Ð©“‘Ïw¿¯sÕ¹ +æóù·Oº5°–R[›Þ¸ÃÃ=c±;¿ öظ«ïáéËüéÿ¾Â¢x(Œùc†'HÌ‹éÜÙþ€w€¢_ãÍðbšGÆ6f8YïëÁzhÍ`ª78eLÁa0Ü궎&‚à ì߀0qÀU¡´½íPBzv†a¼;„×r0ØÜ@齫m‰xиz<™>”ø¶3îœ×‰ÙýDÒ˜²c¶G4·+xµ¡uc€Áø0Øš0"°}Ý i¸]wöd¯4>…ä*=: œ\cô5“óXuÖ·4– «1`§§â”yD>>»¼é:†uO^ÿ©›z(Ìù¯yª¼¶îôщõì0=Rb(ØÑ8Œlbümê@R~p]ç^ÉZíúÆ’_ÿ1Weå^ÌäåòzPê÷´€IÄe«×+ß–ø*s ymϨt³ƒîÆÊ\¼-;8»aâûß&¾½PäK½ãJ€,`£òŸ2 ÌxçY;©WùVv(žé=äKàÙ~È,‰@üÚ(Q+&×›T +¬É,N·‰ÌžasYŽÏ]â;GP^¡¤À¹%¬…ŠWˆÌ2•z±¥Ôa.s6\ioS®`³U›¼HŸ l&³¥B±™~DV¬ø‰(V<M‰Šñ-ªW¤â|³Wòy¥a•§‰ÀâB 2¾HÅ… +MÅ)—ë¾æϤNAŽ(ŠQÛEìV‚JÄÇñk™gd#Î3ð¡K¥ßGw²p% +d©òuÄ(Nœ@tÁ¹L\P(ê) ÷`¶EÓ7-ž"®'û°>Üæ_¸Ÿew \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/objects/75/2b598979416c079bebe57d9dee4fc00d816a5d Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,3 @@ +xmSÑnÓ0å9_q•—µ‚FI»Câ!k»ihí`MB•S;©…cÛi—}=÷&Að@*G¾>÷ÜsÎÍ•É!‰“øUpšFq4ƒ ¬YÉÕ˜ÆÉ,H"¸‘Ï7%H —ÊEuL#H9‡Ýçeš J=À%|jTÓv5g^Àã*]®WQÅa´…ÔÒK£Áf‹ÛŠÕP[1?&tjêÂÑ"Óøºñp> + Ú@Í,«„6˜õ¥ÂZcÁŠÚØ¡ªqR—p¶ÿkæîH÷V0î€)YjD¸Œ`}—Á½<ípŒ¯¤ÃÇؘ̒ÿêænOøJì…æ(HØÁ‡º©raݾ¶ç©ß+ç¿Ç?àõGPBvZþjÄþæi¿˜fã]³Ÿ¢›”‹‚5ÊÿáÞÀµ9{)hÆAAR¤W±×¹‡›Ì90¥&¨Ê±U¬Sö,•£ÑŒƒÑÞO²DÎýÛy‹#Óeß}ͨ™´œ +hÒ‰P_ç™æÌòÁ%òm›®;,QÁ—Mº^ÁÍ}z‹Bq7ÞFá-³^ò ‘d®ÕøÂÉÁ!gN ¾k‚}!?>^o¡°¬¬„öxkºV™Öwè>:rÍYYpÁ»>•DÆÏô~`ßMgrè Â0̸͖»,xÁ¶©»,…Èhâ öÖ#¨¤9µºeŠ=·pB˜çàêïc©)´´T›La¶ú–&qa*‰s8Ð(*_ƃªA“¥õRæÜ¿¹Ûdƒ¬)ž‚!n«çp1©`~AˆtŠ£x~$¸´´É~«hÕDïs@¿ß Jƒ \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/objects/bf/0578826f7cc94414b5132218481f4b3b97821a Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,1 @@ +xTßkÛ0ÞküWYm"«qVðÃ++t,…=´!(µÒˆÚ–ä•ô¯ßI²Ûx[ØÃ&HìÓÝ}÷ݧ“×µZCqVœ¾‘VÆuF¶ÖÏ'6²²Õ£`·¢þ!(8Ùà¿íÖÚ¨{a-ÃÛJ5Éƨæ5z¸KD¸òÑÿš7,”mÒGã»áN™¬´ÓÜX1„~ÓNªvá·L<JØßM³¤w0+ܪÞÕΦ¾–7¢$ß?Íç'×k¸ÖØ'Ɇx^U+,ˆR’o’ç> ßÜNcfÍJXW’’PF—$Èä7[¸¼ºþŒ‘pü7% önš»mäÆŽÐÑ‘Ž<Dg@Ñ>ã…Çâãͬ‘¤±S<<nl†*F9Yx¬ü&J ¿¤ÙùWoÁ%PI·-‡ÝP’¼Íÿq%ªs¾HIŒ¨º{Qåd¶G$¸±®hS_tÖGSò„n”Z¶8Cíëp²pX1z(»H&u飙յt(Áă±'#HÉÔÞ¹ø#ÓÇ!oO—oÏ—4šy±Ì²¹k}edê°ûZY`k0þƒ,FV¥¯/ óÙº´˜‡E?Ä•yêJ˜¢$7_«‘v3‚6â$I¸‡éQ«¶†æ±’¦Ž¦Q¥!£þ3P—7¦8F?x‰Šr”¬wn«ZèLï _Ãyà¹vŒ¸ù#ø•z±oçg(£§Ìó‹âý2#¾¼ÇÕjjáAÃd¿œ4ìâŸ)²'tܧi ÿ{Ÿ£î0‰½õÃ6f”ðð,õ(å ÙÉdògZÅ£ \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/objects/de/51e461a39b764261a5bc6d4e7a014046f677f4 Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,2 @@ +xMÏÍJÄ0`×yŠÃ¬4LÓÁÁe±U+"HÍmH“Ǿ½-ãÂÍÙÜÃw9½q=ÄýñŠ}¾ç%îÐÊÅÃ-ľ(YÁñ¤ÐçÚ"'m"÷•R8}ÔU×`ІØ8¬Àk6Ä?áä•L„ϦªÛ†Ï +×5 Ú꤅Puϳô\˜eºÙôíiš»wgi—“Ï ç‰,¬ƒ—AΔ(°òR¥\@ ïÂ_+GmGœƒ[ÓË4m8â´ÝI!í*8Ú—oú‹l\g0ö°tSõ \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/packed-refs Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,2 @@ +# pack-refs with: peeled +6d252c627030a29eff9cc938f74c0be27ec853f0 refs/remotes/origin/master
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/refs/heads/master Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,1 @@ +85a936d6dab3f3c5585bcab2eee7b18bbc7d7fd8
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/refs/remotes/origin/HEAD Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,1 @@ +ref: refs/remotes/origin/master
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/refs/remotes/origin/master Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,1 @@ +85a936d6dab3f3c5585bcab2eee7b18bbc7d7fd8
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.idea/.name Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,1 @@ +BSseeker2 \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.idea/BSseeker2.iml Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,9 @@ +<?xml version="1.0" encoding="UTF-8"?> +<module type="PYTHON_MODULE" version="4"> + <component name="NewModuleRootManager"> + <content url="file://$MODULE_DIR$" /> + <orderEntry type="inheritedJdk" /> + <orderEntry type="sourceFolder" forTests="false" /> + </component> +</module> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.idea/encodings.xml Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,5 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project version="4"> + <component name="Encoding" useUTFGuessing="true" native2AsciiForPropertiesFiles="false" /> +</project> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.idea/misc.xml Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,5 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project version="4"> + <component name="ProjectRootManager" version="2" project-jdk-name="Python 2.7.2 (/System/Library/Frameworks/Python.framework/Versions/2.7/bin/python)" project-jdk-type="Python SDK" /> +</project> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.idea/modules.xml Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,9 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project version="4"> + <component name="ProjectModuleManager"> + <modules> + <module fileurl="file://$PROJECT_DIR$/.idea/BSseeker2.iml" filepath="$PROJECT_DIR$/.idea/BSseeker2.iml" /> + </modules> + </component> +</project> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.idea/other.xml Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,7 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project version="4"> + <component name="PyDocumentationSettings"> + <option name="myDocStringFormat" value="Plain" /> + </component> +</project> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.idea/scopes/scope_settings.xml Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,5 @@ +<component name="DependencyValidationManager"> + <state> + <option name="SKIP_IMPORT_STATEMENTS" value="false" /> + </state> +</component> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.idea/testrunner.xml Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,8 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project version="4"> + <component name="TestRunnerService"> + <option name="projectConfiguration" value="Unittests" /> + <option name="PROJECT_TEST_RUNNER" value="Unittests" /> + </component> +</project> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.idea/vcs.xml Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,7 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project version="4"> + <component name="VcsDirectoryMappings"> + <mapping directory="$PROJECT_DIR$" vcs="Git" /> + </component> +</project> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.idea/workspace.xml Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,644 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project version="4"> + <component name="ChangeListManager"> + <list default="true" id="172bd281-b8c2-4544-b510-26029a6498b7" name="Default" comment="" /> + <ignored path="BSseeker2.iws" /> + <ignored path=".idea/workspace.xml" /> + <file path="/Dummy.txt" changelist="172bd281-b8c2-4544-b510-26029a6498b7" time="1383633767371" ignored="false" /> + <file path="/a.py" changelist="172bd281-b8c2-4544-b510-26029a6498b7" time="1365491538292" ignored="false" /> + <file path="/bs_rrbs.py" changelist="172bd281-b8c2-4544-b510-26029a6498b7" time="1383621399873" ignored="false" /> + <file path="/bs_pair_end.py" changelist="172bd281-b8c2-4544-b510-26029a6498b7" time="1383622148043" ignored="false" /> + <file path="/bs_single_end.py" changelist="172bd281-b8c2-4544-b510-26029a6498b7" time="1383571511133" ignored="false" /> + <file path="/bs_align_utils.py" changelist="172bd281-b8c2-4544-b510-26029a6498b7" time="1381998901080" ignored="false" /> + <file path="/rrbs_build.py" changelist="172bd281-b8c2-4544-b510-26029a6498b7" time="1367304455503" ignored="false" /> + <file path="/utils.py" changelist="172bd281-b8c2-4544-b510-26029a6498b7" time="1383633738005" ignored="false" /> + <file path="/README.md" changelist="172bd281-b8c2-4544-b510-26029a6498b7" time="1383632256060" ignored="false" /> + <file path="/bs_seeker2-align.py" changelist="172bd281-b8c2-4544-b510-26029a6498b7" time="1383632641037" ignored="false" /> + <file path="/bs_seeker2-build.py" changelist="172bd281-b8c2-4544-b510-26029a6498b7" time="1383632528537" ignored="false" /> + <file path="/bs_seeker2-call_methylation.py" changelist="172bd281-b8c2-4544-b510-26029a6498b7" time="1377909319118" ignored="false" /> + <file path="/rrbs_build_new.py" changelist="172bd281-b8c2-4544-b510-26029a6498b7" time="1366402518261" ignored="false" /> + <file path="/bs_rrbs_new.py" changelist="172bd281-b8c2-4544-b510-26029a6498b7" time="1366448702885" ignored="false" /> + <file path="$APPLICATION_HOME_DIR$/lib/pycharm.jar!/resources/relaxng.rng" changelist="172bd281-b8c2-4544-b510-26029a6498b7" time="1366311004094" ignored="false" /> + <file path="/wg_build.py" changelist="172bd281-b8c2-4544-b510-26029a6498b7" time="1383623123179" ignored="false" /> + <file path="/bs_seeker2_wrapper.py" changelist="172bd281-b8c2-4544-b510-26029a6498b7" time="1368857835824" ignored="false" /> + <file path="/bs_seeker2_wrapper.xml" changelist="172bd281-b8c2-4544-b510-26029a6498b7" time="1382083103094" ignored="false" /> + <file path="/UPDATE" changelist="172bd281-b8c2-4544-b510-26029a6498b7" time="1369021897154" ignored="false" /> + <file path="/RELEASE_NOTES" changelist="172bd281-b8c2-4544-b510-26029a6498b7" time="1383630679572" ignored="false" /> + <file path="/output.py" changelist="172bd281-b8c2-4544-b510-26029a6498b7" time="1382096328368" ignored="false" /> + <file path="/bs_pair_end2.py" changelist="172bd281-b8c2-4544-b510-26029a6498b7" time="1382863836790" ignored="false" /> + <file path="/bs_pair_end_2.py" changelist="172bd281-b8c2-4544-b510-26029a6498b7" time="1383060615712" ignored="false" /> + <option name="TRACKING_ENABLED" value="true" /> + <option name="SHOW_DIALOG" value="false" /> + <option name="HIGHLIGHT_CONFLICTS" value="true" /> + <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" /> + <option name="LAST_RESOLUTION" value="IGNORE" /> + </component> + <component name="ChangesViewManager" flattened_view="true" show_ignored="false" /> + <component name="CreatePatchCommitExecutor"> + <option name="PATCH_PATH" value="" /> + </component> + <component name="DaemonCodeAnalyzer"> + <disable_hints /> + </component> + <component name="ExecutionTargetManager" SELECTED_TARGET="default_target" /> + <component name="FavoritesManager"> + <favorites_list name="BSseeker2"> + <favorite_root url="file://$PROJECT_DIR$/gpl-3.0.txt" type="psiFile" klass="com.intellij.ide.projectView.impl.nodes.PsiFileNode" /> + </favorites_list> + </component> + <component name="FileEditorManager"> + <leaf> + <file leaf-file-name="README.md" pinned="false" current="false" current-in-tab="false"> + <entry file="file://$PROJECT_DIR$/README.md"> + <provider selected="true" editor-type-id="text-editor"> + <state line="77" column="25" selection-start="2888" selection-end="2895" vertical-scroll-proportion="-7.92"> + <folding /> + </state> + </provider> + </entry> + </file> + <file leaf-file-name="utils.py" pinned="false" current="true" current-in-tab="true"> + <entry file="file://$PROJECT_DIR$/bs_utils/utils.py"> + <provider selected="true" editor-type-id="text-editor"> + <state line="14" column="0" selection-start="176" selection-end="176" vertical-scroll-proportion="0.16103059"> + <folding /> + </state> + </provider> + </entry> + </file> + </leaf> + </component> + <component name="FindManager"> + <FindUsagesManager> + <setting name="OPEN_NEW_TAB" value="false" /> + </FindUsagesManager> + </component> + <component name="Git.Settings"> + <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" /> + </component> + <component name="GitLogSettings"> + <option name="myDateState"> + <MyDateState /> + </option> + </component> + <component name="IdeDocumentHistory"> + <option name="changedFiles"> + <list> + <option value="$PROJECT_DIR$/bs_align/bs_align_utils.py" /> + <option value="$PROJECT_DIR$/galaxy/bs_seeker2_wrapper.py" /> + <option value="$PROJECT_DIR$/galaxy/bs_seeker2_wrapper.xml" /> + <option value="$PROJECT_DIR$/bs_align/output.py" /> + <option value="$PROJECT_DIR$/bs_align/bs_pair_end2.py" /> + <option value="$PROJECT_DIR$/bs_align/bs_pair_end_2.py" /> + <option value="$PROJECT_DIR$/bs_align/bs_rrbs.py" /> + <option value="$PROJECT_DIR$/bs_align/bs_single_end.py" /> + <option value="$PROJECT_DIR$/bs_align/bs_pair_end.py" /> + <option value="$PROJECT_DIR$/bs_index/wg_build.py" /> + <option value="$PROJECT_DIR$/bs_seeker2-call_methylation.py" /> + <option value="$PROJECT_DIR$/RELEASE_NOTES" /> + <option value="$PROJECT_DIR$/bs_seeker2-build.py" /> + <option value="$PROJECT_DIR$/bs_seeker2-align.py" /> + <option value="$PROJECT_DIR$/README.md" /> + <option value="$PROJECT_DIR$/bs_utils/utils.py" /> + </list> + </option> + </component> + <component name="ProjectFrameBounds"> + <option name="x" value="53" /> + <option name="y" value="22" /> + <option name="width" value="1226" /> + <option name="height" value="778" /> + </component> + <component name="ProjectLevelVcsManager" settingsEditedManually="false"> + <OptionsSetting value="true" id="Add" /> + <OptionsSetting value="true" id="Remove" /> + <OptionsSetting value="true" id="Checkout" /> + <OptionsSetting value="true" id="Update" /> + <OptionsSetting value="true" id="Status" /> + <OptionsSetting value="true" id="Edit" /> + <ConfirmationsSetting value="0" id="Add" /> + <ConfirmationsSetting value="0" id="Remove" /> + </component> + <component name="ProjectReloadState"> + <option name="STATE" value="0" /> + </component> + <component name="ProjectView"> + <navigator currentView="ProjectPane" proportions="" version="1" splitterProportion="0.5"> + <flattenPackages /> + <showMembers /> + <showModules /> + <showLibraryContents /> + <hideEmptyPackages /> + <abbreviatePackageNames /> + <autoscrollToSource /> + <autoscrollFromSource /> + <sortByType /> + </navigator> + <panes> + <pane id="Scope" /> + <pane id="ProjectPane"> + <subPane> + <PATH> + <PATH_ELEMENT> + <option name="myItemId" value="BSseeker2" /> + <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" /> + </PATH_ELEMENT> + </PATH> + <PATH> + <PATH_ELEMENT> + <option name="myItemId" value="BSseeker2" /> + <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" /> + </PATH_ELEMENT> + <PATH_ELEMENT> + <option name="myItemId" value="BSseeker2" /> + <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> + </PATH_ELEMENT> + </PATH> + <PATH> + <PATH_ELEMENT> + <option name="myItemId" value="BSseeker2" /> + <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" /> + </PATH_ELEMENT> + <PATH_ELEMENT> + <option name="myItemId" value="BSseeker2" /> + <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> + </PATH_ELEMENT> + <PATH_ELEMENT> + <option name="myItemId" value="galaxy" /> + <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> + </PATH_ELEMENT> + </PATH> + <PATH> + <PATH_ELEMENT> + <option name="myItemId" value="BSseeker2" /> + <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" /> + </PATH_ELEMENT> + <PATH_ELEMENT> + <option name="myItemId" value="BSseeker2" /> + <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> + </PATH_ELEMENT> + <PATH_ELEMENT> + <option name="myItemId" value="bs_utils" /> + <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> + </PATH_ELEMENT> + </PATH> + <PATH> + <PATH_ELEMENT> + <option name="myItemId" value="BSseeker2" /> + <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" /> + </PATH_ELEMENT> + <PATH_ELEMENT> + <option name="myItemId" value="BSseeker2" /> + <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> + </PATH_ELEMENT> + <PATH_ELEMENT> + <option name="myItemId" value="bs_index" /> + <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> + </PATH_ELEMENT> + </PATH> + <PATH> + <PATH_ELEMENT> + <option name="myItemId" value="BSseeker2" /> + <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" /> + </PATH_ELEMENT> + <PATH_ELEMENT> + <option name="myItemId" value="BSseeker2" /> + <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> + </PATH_ELEMENT> + <PATH_ELEMENT> + <option name="myItemId" value="bs_align" /> + <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> + </PATH_ELEMENT> + </PATH> + </subPane> + </pane> + </panes> + </component> + <component name="PropertiesComponent"> + <property name="options.splitter.main.proportions" value="0.3" /> + <property name="WebServerToolWindowFactoryState" value="false" /> + <property name="recentsLimit" value="5" /> + <property name="options.lastSelected" value="reference.settingsdialog.IDE.editor.colors.Font" /> + <property name="FullScreen" value="false" /> + <property name="options.searchVisible" value="true" /> + <property name="options.splitter.details.proportions" value="0.2" /> + </component> + <component name="PyConsoleOptionsProvider"> + <option name="myPythonConsoleState"> + <PyConsoleSettings /> + </option> + <option name="myDjangoConsoleState"> + <PyConsoleSettings /> + </option> + </component> + <component name="RecentsManager"> + <key name="CopyFile.RECENT_KEYS"> + <recent name="$PROJECT_DIR$/bs_align" /> + <recent name="$PROJECT_DIR$/bs_index" /> + </key> + </component> + <component name="RunManager" selected="Python.output"> + <configuration default="false" name="output" type="PythonConfigurationType" factoryName="Python" temporary="true"> + <option name="INTERPRETER_OPTIONS" value="" /> + <option name="PARENT_ENVS" value="true" /> + <envs> + <env name="PYTHONUNBUFFERED" value="1" /> + </envs> + <option name="SDK_HOME" value="" /> + <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/bs_align" /> + <option name="IS_MODULE_SDK" value="true" /> + <option name="ADD_CONTENT_ROOTS" value="true" /> + <option name="ADD_SOURCE_ROOTS" value="true" /> + <module name="BSseeker2" /> + <EXTENSION ID="PythonCoverageRunConfigurationExtension" enabled="false" sample_coverage="true" runner="coverage.py" /> + <option name="SCRIPT_NAME" value="$PROJECT_DIR$/bs_align/output.py" /> + <option name="PARAMETERS" value="" /> + <RunnerSettings RunnerId="PythonCover" /> + <ConfigurationWrapper RunnerId="PythonCover" /> + <method /> + </configuration> + <configuration default="true" type="PythonConfigurationType" factoryName="Python"> + <option name="INTERPRETER_OPTIONS" value="" /> + <option name="PARENT_ENVS" value="true" /> + <envs> + <env name="PYTHONUNBUFFERED" value="1" /> + </envs> + <option name="SDK_HOME" value="" /> + <option name="WORKING_DIRECTORY" value="" /> + <option name="IS_MODULE_SDK" value="false" /> + <option name="ADD_CONTENT_ROOTS" value="true" /> + <option name="ADD_SOURCE_ROOTS" value="true" /> + <module name="BSseeker2" /> + <EXTENSION ID="PythonCoverageRunConfigurationExtension" enabled="false" sample_coverage="true" runner="coverage.py" /> + <option name="SCRIPT_NAME" value="" /> + <option name="PARAMETERS" value="" /> + <method /> + </configuration> + <configuration default="true" type="tests" factoryName="Unittests"> + <option name="INTERPRETER_OPTIONS" value="" /> + <option name="PARENT_ENVS" value="true" /> + <envs /> + <option name="SDK_HOME" value="" /> + <option name="WORKING_DIRECTORY" value="" /> + <option name="IS_MODULE_SDK" value="false" /> + <option name="ADD_CONTENT_ROOTS" value="true" /> + <option name="ADD_SOURCE_ROOTS" value="true" /> + <module name="BSseeker2" /> + <EXTENSION ID="PythonCoverageRunConfigurationExtension" enabled="false" sample_coverage="true" runner="coverage.py" /> + <option name="SCRIPT_NAME" value="" /> + <option name="CLASS_NAME" value="" /> + <option name="METHOD_NAME" value="" /> + <option name="FOLDER_NAME" value="" /> + <option name="TEST_TYPE" value="TEST_SCRIPT" /> + <option name="PATTERN" value="" /> + <option name="USE_PATTERN" value="false" /> + <option name="PUREUNITTEST" value="true" /> + <option name="PARAMS" value="" /> + <option name="USE_PARAM" value="false" /> + <method /> + </configuration> + <configuration default="true" type="tests" factoryName="Doctests"> + <option name="INTERPRETER_OPTIONS" value="" /> + <option name="PARENT_ENVS" value="true" /> + <envs /> + <option name="SDK_HOME" value="" /> + <option name="WORKING_DIRECTORY" value="" /> + <option name="IS_MODULE_SDK" value="false" /> + <option name="ADD_CONTENT_ROOTS" value="true" /> + <option name="ADD_SOURCE_ROOTS" value="true" /> + <module name="BSseeker2" /> + <EXTENSION ID="PythonCoverageRunConfigurationExtension" enabled="false" sample_coverage="true" runner="coverage.py" /> + <option name="SCRIPT_NAME" value="" /> + <option name="CLASS_NAME" value="" /> + <option name="METHOD_NAME" value="" /> + <option name="FOLDER_NAME" value="" /> + <option name="TEST_TYPE" value="TEST_SCRIPT" /> + <option name="PATTERN" value="" /> + <option name="USE_PATTERN" value="false" /> + <method /> + </configuration> + <list size="1"> + <item index="0" class="java.lang.String" itemvalue="Python.output" /> + </list> + <recent_temporary> + <list size="1"> + <item index="0" class="java.lang.String" itemvalue="Python.output" /> + </list> + </recent_temporary> + </component> + <component name="ShelveChangesManager" show_recycled="false" /> + <component name="SvnConfiguration" maxAnnotateRevisions="500" myUseAcceleration="nothing" myAutoUpdateAfterCommit="false" cleanupOnStartRun="false" SSL_PROTOCOLS="sslv3"> + <option name="USER" value="" /> + <option name="PASSWORD" value="" /> + <option name="mySSHConnectionTimeout" value="30000" /> + <option name="mySSHReadTimeout" value="30000" /> + <option name="LAST_MERGED_REVISION" /> + <option name="MERGE_DRY_RUN" value="false" /> + <option name="MERGE_DIFF_USE_ANCESTRY" value="true" /> + <option name="UPDATE_LOCK_ON_DEMAND" value="false" /> + <option name="IGNORE_SPACES_IN_MERGE" value="false" /> + <option name="CHECK_NESTED_FOR_QUICK_MERGE" value="false" /> + <option name="IGNORE_SPACES_IN_ANNOTATE" value="true" /> + <option name="SHOW_MERGE_SOURCES_IN_ANNOTATE" value="true" /> + <option name="FORCE_UPDATE" value="false" /> + <option name="IGNORE_EXTERNALS" value="false" /> + <myIsUseDefaultProxy>false</myIsUseDefaultProxy> + </component> + <component name="TaskManager"> + <task active="true" id="Default" summary="Default task"> + <changelist id="172bd281-b8c2-4544-b510-26029a6498b7" name="Default" comment="" /> + <created>1365490852338</created> + <updated>1365490852338</updated> + </task> + <task id="LOCAL-00001" summary="update by Weilong"> + <created>1365495166132</created> + <updated>1365495166132</updated> + </task> + <task id="LOCAL-00002" summary="Add version track; Accelerate alignment; Revised RRBS part."> + <created>1366217039909</created> + <updated>1366217039909</updated> + </task> + <task id="LOCAL-00003" summary="RRBS un-directional RRBS different digest enzyme"> + <created>1366521472272</created> + <updated>1366521472272</updated> + </task> + <task id="LOCAL-00004" summary="Multiple-sites updates"> + <created>1368857624039</created> + <updated>1368857624039</updated> + </task> + <task id="LOCAL-00005" summary="Clean-up/Galaxy/Test_data"> + <created>1368872343967</created> + <updated>1368872343967</updated> + </task> + <task id="LOCAL-00006" summary="Clean-up/galaxy/test_data"> + <created>1368872394831</created> + <updated>1368872394831</updated> + </task> + <task id="LOCAL-00007" summary="Fix bug on utils.py; Increased version; Add UPDATE"> + <created>1369030443105</created> + <updated>1369030443105</updated> + </task> + <task id="LOCAL-00008" summary="support diff indexes"> + <created>1369355034039</created> + <updated>1369355034040</updated> + </task> + <task id="LOCAL-00009" summary="Update Readme"> + <created>1369785459146</created> + <updated>1369785459146</updated> + </task> + <task id="LOCAL-00010" summary="fix bug in galaxy; chmod +x *.py"> + <created>1369876356622</created> + <updated>1369876356622</updated> + </task> + <task id="LOCAL-00011" summary="fix galaxy xml"> + <created>1370044685987</created> + <updated>1370044685987</updated> + </task> + <task id="LOCAL-00012" summary="fix bugs for RRBS tag"> + <created>1371202921858</created> + <updated>1371202921858</updated> + </task> + <task id="LOCAL-00013" summary="fix bugs for RRBS tag"> + <created>1371366774531</created> + <updated>1371366774531</updated> + </task> + <task id="LOCAL-00014" summary="fix a bug"> + <created>1375307754641</created> + <updated>1375307754641</updated> + </task> + <task id="LOCAL-00015" summary="V2.0.5"> + <created>1383633777408</created> + <updated>1383633777408</updated> + </task> + <option name="localTasksCounter" value="16" /> + <servers /> + </component> + <component name="TodoView" selected-index="0"> + <todo-panel id="selected-file"> + <are-packages-shown value="false" /> + <are-modules-shown value="false" /> + <flatten-packages value="false" /> + <is-autoscroll-to-source value="false" /> + </todo-panel> + <todo-panel id="all"> + <are-packages-shown value="false" /> + <are-modules-shown value="false" /> + <flatten-packages value="false" /> + <is-autoscroll-to-source value="false" /> + </todo-panel> + <todo-panel id="default-changelist"> + <are-packages-shown value="false" /> + <are-modules-shown value="false" /> + <flatten-packages value="false" /> + <is-autoscroll-to-source value="false" /> + </todo-panel> + </component> + <component name="ToolWindowManager"> + <frame x="53" y="22" width="1226" height="778" extended-state="0" /> + <editor active="true" /> + <layout> + <window_info id="Messages" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" /> + <window_info id="Changes" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.32970452" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" /> + <window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.32980332" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" /> + <window_info id="Database" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" /> + <window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.24978974" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" /> + <window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" weight="0.25211507" sideWeight="0.6718507" order="0" side_tool="false" content_ui="combo" /> + <window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.4" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" /> + <window_info id="Favorites" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.32968882" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" /> + <window_info id="Event Log" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.32980332" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" /> + <window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" /> + <window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" /> + <window_info id="Cvs" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="4" side_tool="false" content_ui="tabs" /> + <window_info id="Message" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" /> + <window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" /> + <window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.32829046" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" /> + <window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="SLIDING" type="SLIDING" visible="false" weight="0.4" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" /> + <window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="2" side_tool="false" content_ui="combo" /> + <window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.4" sideWeight="0.5" order="5" side_tool="false" content_ui="tabs" /> + </layout> + </component> + <component name="VcsContentAnnotationSettings"> + <option name="myLimit" value="2678400000" /> + </component> + <component name="VcsManagerConfiguration"> + <option name="OFFER_MOVE_TO_ANOTHER_CHANGELIST_ON_PARTIAL_COMMIT" value="true" /> + <option name="CHECK_CODE_SMELLS_BEFORE_PROJECT_COMMIT" value="false" /> + <option name="CHECK_NEW_TODO" value="true" /> + <option name="myTodoPanelSettings"> + <value> + <are-packages-shown value="false" /> + <are-modules-shown value="false" /> + <flatten-packages value="false" /> + <is-autoscroll-to-source value="false" /> + </value> + </option> + <option name="PERFORM_UPDATE_IN_BACKGROUND" value="true" /> + <option name="PERFORM_COMMIT_IN_BACKGROUND" value="true" /> + <option name="PERFORM_EDIT_IN_BACKGROUND" value="true" /> + <option name="PERFORM_CHECKOUT_IN_BACKGROUND" value="true" /> + <option name="PERFORM_ADD_REMOVE_IN_BACKGROUND" value="true" /> + <option name="PERFORM_ROLLBACK_IN_BACKGROUND" value="false" /> + <option name="CHECK_LOCALLY_CHANGED_CONFLICTS_IN_BACKGROUND" value="false" /> + <option name="CHANGED_ON_SERVER_INTERVAL" value="60" /> + <option name="SHOW_ONLY_CHANGED_IN_SELECTION_DIFF" value="true" /> + <option name="CHECK_COMMIT_MESSAGE_SPELLING" value="true" /> + <option name="DEFAULT_PATCH_EXTENSION" value="patch" /> + <option name="SHORT_DIFF_HORIZONTALLY" value="true" /> + <option name="SHORT_DIFF_EXTRA_LINES" value="2" /> + <option name="SOFT_WRAPS_IN_SHORT_DIFF" value="true" /> + <option name="INCLUDE_TEXT_INTO_PATCH" value="false" /> + <option name="INCLUDE_TEXT_INTO_SHELF" value="false" /> + <option name="SHOW_FILE_HISTORY_DETAILS" value="true" /> + <option name="SHOW_VCS_ERROR_NOTIFICATIONS" value="true" /> + <option name="SHOW_DIRTY_RECURSIVELY" value="false" /> + <option name="LIMIT_HISTORY" value="true" /> + <option name="MAXIMUM_HISTORY_ROWS" value="1000" /> + <option name="UPDATE_FILTER_SCOPE_NAME" /> + <option name="USE_COMMIT_MESSAGE_MARGIN" value="false" /> + <option name="COMMIT_MESSAGE_MARGIN_SIZE" value="72" /> + <option name="WRAP_WHEN_TYPING_REACHES_RIGHT_MARGIN" value="false" /> + <option name="FORCE_NON_EMPTY_COMMENT" value="false" /> + <option name="CLEAR_INITIAL_COMMIT_MESSAGE" value="false" /> + <option name="LAST_COMMIT_MESSAGE" value="V2.0.5" /> + <option name="MAKE_NEW_CHANGELIST_ACTIVE" value="false" /> + <option name="OPTIMIZE_IMPORTS_BEFORE_PROJECT_COMMIT" value="false" /> + <option name="CHECK_FILES_UP_TO_DATE_BEFORE_COMMIT" value="false" /> + <option name="REFORMAT_BEFORE_PROJECT_COMMIT" value="false" /> + <option name="REFORMAT_BEFORE_FILE_COMMIT" value="false" /> + <option name="FILE_HISTORY_DIALOG_COMMENTS_SPLITTER_PROPORTION" value="0.8" /> + <option name="FILE_HISTORY_DIALOG_SPLITTER_PROPORTION" value="0.5" /> + <option name="ACTIVE_VCS_NAME" /> + <option name="UPDATE_GROUP_BY_PACKAGES" value="false" /> + <option name="UPDATE_GROUP_BY_CHANGELIST" value="false" /> + <option name="UPDATE_FILTER_BY_SCOPE" value="false" /> + <option name="SHOW_FILE_HISTORY_AS_TREE" value="false" /> + <option name="FILE_HISTORY_SPLITTER_PROPORTION" value="0.6" /> + <MESSAGE value="update by Weilong" /> + <MESSAGE value="Add version track; Accelerate alignment; Revised RRBS part." /> + <MESSAGE value="RRBS un-directional RRBS different digest enzyme" /> + <MESSAGE value="Multiple-sites updates" /> + <MESSAGE value="Clean-up/Galaxy/Test_data" /> + <MESSAGE value="Clean-up/galaxy/test_data" /> + <MESSAGE value="Fix bug on utils.py; Increased version; Add UPDATE" /> + <MESSAGE value="support diff indexes" /> + <MESSAGE value="Update Readme" /> + <MESSAGE value="fix bug in galaxy; chmod +x *.py" /> + <MESSAGE value="Bug fixed in Galaxy.xml" /> + <MESSAGE value="fix galaxy xml" /> + <MESSAGE value="fix bugs for RRBS tag" /> + <MESSAGE value="fix a bug" /> + <MESSAGE value="V2.0.5" /> + </component> + <component name="XDebuggerManager"> + <breakpoint-manager> + <option name="time" value="23" /> + </breakpoint-manager> + </component> + <component name="editorHistoryManager"> + <entry file="file://$PROJECT_DIR$/FilterReads.py"> + <provider selected="true" editor-type-id="text-editor"> + <state line="234" column="14" selection-start="6889" selection-end="6889" vertical-scroll-proportion="0.7379768" /> + </provider> + </entry> + <entry file="file://$PROJECT_DIR$/galaxy/bs_seeker2_wrapper.py"> + <provider selected="true" editor-type-id="text-editor"> + <state line="116" column="0" selection-start="4938" selection-end="4938" vertical-scroll-proportion="1.3648424" /> + </provider> + </entry> + <entry file="file://$PROJECT_DIR$/galaxy/bs_seeker2_wrapper.xml"> + <provider selected="true" editor-type-id="text-editor"> + <state line="2" column="40" selection-start="188" selection-end="188" vertical-scroll-proportion="-0.5970149" /> + </provider> + </entry> + <entry file="file://$PROJECT_DIR$/bs_align/output.py"> + <provider selected="true" editor-type-id="text-editor"> + <state line="53" column="25" selection-start="2172" selection-end="2172" vertical-scroll-proportion="0.9339775" /> + </provider> + </entry> + <entry file="file://$PROJECT_DIR$/bs_align/bs_align_utils.py"> + <provider selected="true" editor-type-id="text-editor"> + <state line="390" column="21" selection-start="12695" selection-end="12695" vertical-scroll-proportion="0.59731543" /> + </provider> + </entry> + <entry file="file:///System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/fileinput.py"> + <provider selected="true" editor-type-id="text-editor"> + <state line="90" column="4" selection-start="3827" selection-end="3827" vertical-scroll-proportion="0.28391168" /> + </provider> + </entry> + <entry file="file://$PROJECT_DIR$/bs_index/wg_build.py"> + <provider selected="true" editor-type-id="text-editor"> + <state line="7" column="37" selection-start="268" selection-end="268" vertical-scroll-proportion="-0.70347005"> + <folding /> + </state> + </provider> + </entry> + <entry file="file://$PROJECT_DIR$/bs_seeker2-build.py"> + <provider selected="true" editor-type-id="text-editor"> + <state line="15" column="102" selection-start="651" selection-end="651" vertical-scroll-proportion="0.19323671"> + <folding /> + </state> + </provider> + </entry> + <entry file="file://$PROJECT_DIR$/bs_align/bs_single_end.py"> + <provider selected="true" editor-type-id="text-editor"> + <state line="73" column="19" selection-start="2782" selection-end="2789" vertical-scroll-proportion="0.9328859"> + <folding> + <element signature="e#0#40#0" expanded="true" /> + </folding> + </state> + </provider> + </entry> + <entry file="file://$PROJECT_DIR$/bs_align/bs_rrbs.py"> + <provider selected="true" editor-type-id="text-editor"> + <state line="621" column="50" selection-start="27893" selection-end="27908" vertical-scroll-proportion="0.33221477"> + <folding /> + </state> + </provider> + </entry> + <entry file="file://$PROJECT_DIR$/bs_align/bs_pair_end.py"> + <provider selected="true" editor-type-id="text-editor"> + <state line="734" column="76" selection-start="31798" selection-end="31798" vertical-scroll-proportion="20.067114"> + <folding /> + </state> + </provider> + </entry> + <entry file="file://$PROJECT_DIR$/RELEASE_NOTES"> + <provider selected="true" editor-type-id="text-editor"> + <state line="23" column="0" selection-start="1006" selection-end="1006" vertical-scroll-proportion="0.6845426"> + <folding /> + </state> + </provider> + </entry> + <entry file="file://$PROJECT_DIR$/bs_seeker2-call_methylation.py"> + <provider selected="true" editor-type-id="text-editor"> + <state line="135" column="30" selection-start="5381" selection-end="5404" vertical-scroll-proportion="-1.1900162"> + <folding> + <element signature="e#23#69#0" expanded="true" /> + </folding> + </state> + </provider> + </entry> + <entry file="file://$PROJECT_DIR$/bs_seeker2-align.py"> + <provider selected="true" editor-type-id="text-editor"> + <state line="57" column="122" selection-start="5501" selection-end="5501" vertical-scroll-proportion="0.7085346"> + <folding /> + </state> + </provider> + </entry> + <entry file="file://$PROJECT_DIR$/README.md"> + <provider selected="true" editor-type-id="text-editor"> + <state line="77" column="25" selection-start="2888" selection-end="2895" vertical-scroll-proportion="-7.92"> + <folding /> + </state> + </provider> + </entry> + <entry file="file://$PROJECT_DIR$/bs_utils/utils.py"> + <provider selected="true" editor-type-id="text-editor"> + <state line="14" column="0" selection-start="176" selection-end="176" vertical-scroll-proportion="0.16103059"> + <folding /> + </state> + </provider> + </entry> + </component> +</project> +
--- a/BSseeker2/FilterReads.py Fri Jul 12 18:47:28 2013 -0400 +++ b/BSseeker2/FilterReads.py Tue Nov 05 01:55:39 2013 -0500 @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python import sys import os @@ -226,7 +226,7 @@ (options, args) = parser.parse_args() if (options.infile is None) or (options.outfile is None) : - print parser.print_help() + parser.print_help() exit(-1) FilterReads(options.infile, options.outfile, options.keepquality)
--- a/BSseeker2/README.md Fri Jul 12 18:47:28 2013 -0400 +++ b/BSseeker2/README.md Tue Nov 05 01:55:39 2013 -0500 @@ -48,9 +48,10 @@ * Linux or Mac OS platform * One of the following Aligner - - [bowtie](http://bowtie-bio.sourceforge.net/) - - [bowtie2](http://bowtie-bio.sourceforge.net/bowtie2/) (Recommend) + - [bowtie](http://bowtie-bio.sourceforge.net/) (fast) + - [bowtie2](http://bowtie-bio.sourceforge.net/bowtie2/) (Default) - [soap](http://soap.genomics.org.cn/) + - [rmap](http://www.cmb.usc.edu/people/andrewds/rmap/) * [Python](http://www.python.org/download/) (Version 2.6 +) (It is normally pre-installed in Linux. Type " python -V" to see the installed version.) @@ -74,7 +75,7 @@ $ python FilterReads.py Usage: FilterReads.py -i <input> -o <output> [-k] - Author : Guo, Weilong; guoweilong@gmail.com; 2012-11-10 + Author : Guo, Weilong; 2012-11-10 Unique reads for qseq/fastq/fasta/sequencce, and filter low quality file in qseq file. @@ -105,16 +106,16 @@ -h, --help show this help message and exit -f FILE, --file=FILE Input your reference genome file (fasta) --aligner=ALIGNER Aligner program to perform the analysis: bowtie, - bowtie2, soap [Default: bowtie2] - -p PATH, --path=PATH Path to the aligner program. Defaults: - bowtie: /u/home/mcdb/weilong/install/bowtie-0.12.8 - bowtie2: - /u/home/mcdb/weilong/install/bowtie2-2.0.0-beta7 - soap: /u/home/mcdb/weilong/install/soap2.21release/ + bowtie2, soap, rmap [Default: bowtie] + -p PATH, --path=PATH Path to the aligner program. Detected: + bowtie: ~/install/bowtie + bowtie2: ~/install/bowtie2 + rmap: ~/install/rmap_/bin + soap: ~/install/soap/ -d DBPATH, --db=DBPATH Path to the reference genome library (generated in - preprocessing genome) [Default: /u/home/mcdb/weilong/i - nstall/BSseeker2/bs_utils/reference_genomes] + preprocessing genome) [Default: ~/install + /BSseeker2/bs_utils/reference_genomes] -v, --version show version of BS-Seeker2 Reduced Representation Bisulfite Sequencing Options: @@ -125,7 +126,7 @@ certain fragments will be masked. [Default: False] -l LOW_BOUND, --low=LOW_BOUND lower bound of fragment length (excluding recognition - sequence such as C-CGG) [Default: 40] + sequence such as C-CGG) [Default: 20] -u UP_BOUND, --up=UP_BOUND upper bound of fragment length (excluding recognition sequence such as C-CGG ends) [Default: 500] @@ -134,6 +135,7 @@ Mael:(C-TAG), double-enzyme MspI&Mael:(C-CGG,C-TAG). [Default: C-CGG] + ##Example * Build genome index for WGBS using bowtie, path of bowtie should be included in $PATH @@ -150,7 +152,7 @@ * Build genome index for RRBS library for double-enzyme : MspI (C-CGG) & ApeKI (G-CWGC, where W=A|T, see [IUPAC code](http://www.bioinformatics.org/sms/iupac.html)) - python bs_seeker2-build.py -f genome.fa -r -c C-CGG,G-CWGC + python bs_seeker2-build.py -f genome.fa -r -c C-CGG,G-CWGC --aligner=bowtie ##Tips: @@ -172,75 +174,73 @@ ##Usage : $ python ~/install/BSseeker2/bs_seeker2-align.py -h - Usage: bs_seeker2-align.py [options] + Usage: bs_seeker2-align.py {-i <single> | -1 <mate1> -2 <mate2>} -g <genome.fa> [options] Options: -h, --help show this help message and exit For single end reads: -i INFILE, --input=INFILE - Input your read file name (FORMAT: sequences, - fastq, qseq,fasta) + Input read file (FORMAT: sequences, qseq, fasta, + fastq). Ex: read.fa or read.fa.gz For pair end reads: -1 FILE, --input_1=FILE - Input your read file end 1 (FORMAT: sequences, - qseq, fasta, fastq) + Input read file, mate 1 (FORMAT: sequences, qseq, + fasta, fastq) -2 FILE, --input_2=FILE - Input your read file end 2 (FORMAT: sequences, - qseq, fasta, fastq) - --minins=MIN_INSERT_SIZE + Input read file, mate 2 (FORMAT: sequences, qseq, + fasta, fastq) + -I MIN_INSERT_SIZE, --minins=MIN_INSERT_SIZE The minimum insert size for valid paired-end - alignments [Default: -1] - --maxins=MAX_INSERT_SIZE + alignments [Default: 0] + -X MAX_INSERT_SIZE, --maxins=MAX_INSERT_SIZE The maximum insert size for valid paired-end - alignments [Default: 400] + alignments [Default: 500] Reduced Representation Bisulfite Sequencing Options: - -r, --rrbs Process reads from Reduced Representation Bisulfite - Sequencing experiments + -r, --rrbs Map reads to the Reduced Representation genome -c pattern, --cut-site=pattern Cutting sites of restriction enzyme. Ex: MspI(C-CGG), Mael:(C-TAG), double-enzyme MspI&Mael:(C-CGG,C-TAG). + [Default: C-CGG] -L RRBS_LOW_BOUND, --low=RRBS_LOW_BOUND - lower bound of fragment length (excluding C-CGG ends) - [Default: 40] + Lower bound of fragment length (excluding C-CGG ends) + [Default: 20] -U RRBS_UP_BOUND, --up=RRBS_UP_BOUND - upper bound of fragment length (excluding C-CGG ends) + Upper bound of fragment length (excluding C-CGG ends) [Default: 500] General options: -t TAG, --tag=TAG [Y]es for undirectional lib, [N]o for directional [Default: N] -s CUTNUMBER1, --start_base=CUTNUMBER1 - The first base of your read to be mapped [Default: 1] + The first cycle of the read to be mapped [Default: 1] -e CUTNUMBER2, --end_base=CUTNUMBER2 - The last cycle number of your read to be mapped - [Default: 200] + The last cycle of the read to be mapped [Default: 200] -a FILE, --adapter=FILE Input text file of your adaptor sequences (to be - trimed from the 3'end of the reads). Input 1 seq for - dir. lib., 2 seqs for undir. lib. One line per - sequence + trimmed from the 3'end of the reads, ). Input one seq + for dir. lib., twon seqs for undir. lib. One line per + sequence. Only the first 10bp will be used --am=ADAPTER_MISMATCH - Number of mismatches allowed in adaptor [Default: 1] + Number of mismatches allowed in adapter [Default: 0] -g GENOME, --genome=GENOME - Name of the reference genome (the same as the - reference genome file in the preprocessing step) [ex. - chr21_hg18.fa] - -m INT_NO_MISMATCHES, --mismatches=INT_NO_MISMATCHES + Name of the reference genome (should be the same as + "-f" in bs_seeker2-build.py ) [ex. chr21_hg18.fa] + -m NO_MISMATCHES, --mismatches=NO_MISMATCHES Number of mismatches in one read [Default: 4] - --aligner=ALIGNER Aligner program to perform the analisys: bowtie, - bowtie2, soap [Default: bowtie2] + --aligner=ALIGNER Aligner program for short reads mapping: bowtie, + bowtie2, soap, rmap [Default: bowtie] -p PATH, --path=PATH - Path to the aligner program. Defaults: - bowtie: /u/home/mcdb/weilong/install/bowtie-0.12.8 - bowtie2: - /u/home/mcdb/weilong/install/bowtie2-2.0.0-beta7 - soap: /u/home/mcdb/weilong/soap2.21release/ + Path to the aligner program. Detected: + bowtie: ~/install/bowtie + bowtie2: ~/install/bowtie2 + rmap: ~/install/rmap/bin + soap: ~/install/soap/ -d DBPATH, --db=DBPATH Path to the reference genome library (generated in - preprocessing genome) [Default: /u/home/mcdb/weilong/i + preprocessing genome) [Default: ~/i nstall/BSseeker2/bs_utils/reference_genomes] -l NO_SPLIT, --split_line=NO_SPLIT Number of lines per split (the read file will be split @@ -251,7 +251,7 @@ -f FORMAT, --output-format=FORMAT Output format: bam, sam, bs_seeker1 [Default: bam] --no-header Suppress SAM header lines [Default: False] - --temp_dir=PATH The path to your temporary directory [Default: /tmp] + --temp_dir=PATH The path to your temporary directory [Detected: /tmp] --XS=XS_FILTER Filter definition for tag XS, format X,Y. X=0.8 and y=5 indicate that for one read, if #(mCH sites)/#(all CH sites)>0.8 and #(mCH sites)>5, then tag XS=1; or @@ -263,31 +263,51 @@ Aligner Options: You may specify any additional options for the aligner. You just have to prefix them with --bt- for bowtie, --bt2- for bowtie2, --soap- for - soap, and BS Seeker will pass them on. For example: --bt-p 4 will - increase the number of threads for bowtie to 4, --bt--tryhard will - instruct bowtie to try as hard as possible to find valid alignments - when they exist, and so on. Be sure that you know what you are doing - when using these options! Also, we don't do any validation on the - values. - + soap, --rmap- for rmap, and BS Seeker will pass them on. For example: + --bt-p 4 will increase the number of threads for bowtie to 4, --bt-- + tryhard will instruct bowtie to try as hard as possible to find valid + alignments when they exist, and so on. Be sure that you know what you + are doing when using these options! Also, we don't do any validation + on the values. ##Examples : -* Align from fasta format with bowtie2 (local alignment) for whole genome, allowing 3 mismatches +* WGBS library ; alignment mode, bowtie ; map to WGBS index + + python bs_seeker2-align.py -i WGBS.fa --aligner=bowtie -o WGBS.bam -f bam -g genome.fa + +* WGBS library ; alignment mode, bowtie2-local ; map to WGBS index - python bs_seeker2-align.py -i WGBS.fa -m 3 --aligner=bowtie2 -o WGBS.bam -f bam -g genome.fa + python bs_seeker2-align.py -i WGBS.fa --aligner=bowtie2 -o WGBS.bam -f bam -g genome.fa + +* WGBS library ; alignment mode, bowtie2-end-to-end ; map to WGBS index -* Align from qseq format for RRBS with bowtie, default parameters for RRBS fragments + python bs_seeker2-align.py -i WGBS.fa -m 3 --aligner=bowtie2 -o WGBS.bam -f bam -g genome.fa --bt2--end-to-end + +* RRBS library ; alignment mode, bowtie ; map to RR index - python bs_seeker2-align.py -i RRBS.fa --aligner=bowtie -o RRBS.sam -f sam -g genome.fa -r -a adapter.txt + python bs_seeker2-align.py -i RRBS.fa --aligner=bowtie -o RRBS.bam -g genome.fa -r -a adapter.txt + +* RRBS library ; alignment mode, bowtie ; map to WG index + + python bs_seeker2-align.py -i RRBS.fa --aligner=bowtie -o RRBS.bam -g genome.fa -a adapter.txt -* Align from qseq format for RRBS with bowtie (end-to-end), specifying lengths of fragments ranging [40bp, 400bp] +* RRBS library ; alignment mode, bowtie2-end-to-end ; map to WG index + + python bs_seeker2-align.py -i RRBS.fa --aligner=bowtie -o RRBS.bam -g genome.fa -a adapter.txt --bt2--end-to-end - python bs_seeker2-align.py -i RRBS.qseq --aligner=bowtie2 --bt2--end-to-end -o RRBS.bam -f bam -g genome.fa -r --low=40 --up=400 -a adapter.txt +* Align from qseq format for RRBS with bowtie, specifying lengths of fragments ranging [40bp, 400bp] + + python bs_seeker2-align.py -i RRBS.qseq --aligner=bowtie -o RRBS.bam -f bam -g genome.fa -r --low=40 --up=400 -a adapter.txt The parameters '--low' and '--up' should be the same with corresponding parameters when building the genome index +* WGBS library ; alignment mode, bowtie ; map to WGBS index; use 8 threads for alignment + + python bs_seeker2-align.py -i WGBS.fa --aligner=bowtie -o WGBS.bam -f bam -g genome.fa --bt-p 4 + +BS-Seeker2 will run TWO bowtie instances in parallel. ##Input file: @@ -341,6 +361,10 @@ For example, if 95% of reads come from fragments with length range [50bp, 250bp], you'd better choose [40bp, 300bp]. +- Fewer mismatches for the 'local alignment' mode. + + As the 'local alignment', the bad sequenced bases are usually trimmed, and would not be considered by the parameter "-m". + It is suggested to user fewer mismatches for the 'local alignment' mode. (3) bs_seeker2-call_methylation.py ------------ @@ -351,16 +375,14 @@ ##Usage: $ python bs_seeker2-call_methylation.py -h - Usage: bs_seeker2-call_methylation.py [options] - Options: -h, --help show this help message and exit -i INFILE, --input=INFILE BAM output from bs_seeker2-align.py -d DBPATH, --db=DBPATH Path to the reference genome library (generated in - preprocessing genome) [Default: /u/home/mcdb/weilong/i - nstall/BSseeker2/bs_utils/reference_genomes] + preprocessing genome) [Default: ~/install + /BSseeker2/bs_utils/reference_genomes] -o OUTFILE, --output-prefix=OUTFILE The output prefix to create ATCGmap and wiggle files [INFILE] @@ -370,6 +392,7 @@ -x, --rm-SX Removed reads with tag 'XS:i:1', which would be considered as not fully converted by bisulfite treatment [Default: False] + --txt Show CGmap and ATCGmap in .gz [Default: False] -r READ_NO, --read-no=READ_NO The least number of reads covering one site to be shown in wig file [Default: 1] @@ -430,9 +453,9 @@ (3) position (4) context (CG/CHG/CHH) (5) dinucleotide-context (CA/CC/CG/CT) - (6) methyltion-level = #-of-C / (#-of-C + #-of-T) - (7) #-of-C (methylated) - (8) (#-ofC + #-of-T) (all cytosines) + (6) methylation-level = #_of_C / (#_of_C + #_of_T). + (7) #_of_C (methylated C, the count of reads showing C here) + (8) = #_of_C + #_of_T (all Cytosines, the count of reads showing C or T here) - ATCGmap file @@ -442,7 +465,7 @@ chr1 T 3009410 -- -- 0 10 0 0 0 0 0 0 0 0 na chr1 C 3009411 CHH CC 0 10 0 0 0 0 0 0 0 0 0.0 chr1 C 3009412 CHG CC 0 10 0 0 0 0 0 0 0 0 0.0 - chr1 C 3009413 CG CG 0 10 50 0 0 0 0 0 0 0 0.833333333333 + chr1 C 3009413 CG CG 0 10 50 0 0 0 0 0 0 0 0.83 Format descriptions: @@ -467,14 +490,15 @@ (14) # of reads from Crick strand mapped here, support G on Watson strand and C on Crick strand (15) # of reads from Crick strand mapped here, support N - (16) methylation_level = #C/(#C+#T) = (C8+C14)/(C7+C8+C11+C14); "nan" means none reads support C/T at this position. + (16) methylation_level = #C/(#C+#T) = C8/(C7+C8) for Watson strand, =C14/(C11+C14) for Crick strand; + "nan" means none reads support C/T at this position. Contact Information ============ -If you still have questions on BS-Seeker 2, or you find bugs when using BS-Seeker 2, or you have suggestions, please write email to guoweilong@gmail.com (Weilong Guo). +If you still have questions on BS-Seeker 2, or you find bugs when using BS-Seeker 2, or you have suggestions, please write email to [Weilong Guo](guoweilong@gmail.com). @@ -556,7 +580,7 @@ cd BSseeker2-master/ -(4)Run BS-Seeker2 +(4)Run BS-Seeker2 Q: Can I add the path of BS-Seeker2's *.py to the $PATH, so I can call BS-Seeker2 from anywhere? @@ -585,6 +609,69 @@ bs_seeker-align.py -h bs_seeker-call_methylation.py -h +(5) Unique alignment + +Q: If I want to only keep alignments that map uniquely, is this an argument I should supply directly +to Bowtie2 (via BS Seeker 2's command line option), or is this an option that's available in +BS Seeker 2 itself? + +A: BS-Seeker2 reports unique alignment by default already. If you want to know how many reads +could have multiple hits, run BS-Seeker2 with parameter "--multiple-hit". + + +(6) Output + +Q: In CGmap files, why some lines shown "--" but not a motif (CG/CHG/CHH), for example: + + chr01 C 4303711 -- CC 0.0 0 2 + chr01 C 4303712 -- CN 0.0 0 2 + +A: In this example, the site 4303713 is "N" in genome, thus we could not decide the explict pattern. + +(7) Algorithm to remove the adapter. + +Q: What's the algorithm to remove the adapter + +A: BS-Seeker2 has built-in algorithm for removing the adapter, which is developed by [Weilong Guo](http://bioinfo.au.tsinghua.edu.cn/member/wguo/index.html). + + First, if the adapter was provided as "AGATCGGAAGAGCACACGTC", only the first 10bp would be used. + Second, we use semi-local alignment strategy for removing the adapters. + Exmaple: + + Read : ACCGCGTTGATCGAGTACGTACGTGGGTC + Adapter : ....................ACGTGGGTCCCG + + no_mismatch : the maximum number allowed for mismatches + + Algorithm: (allowing 1 mismatch) + -Step 1: + ACCGCGTTGATCGAGTACGTACGTGGGTC + ||XX + ACGTGGGTCCCG + -Step 2: + ACCGCGTTGATCGAGTACGTACGTGGGTC + X||X + .ACGTGGGTCCCG + -Step 3: + ACCGCGTTGATCGAGTACGTACGTGGGTC + XX + ..ACGTGGGTCCCG + -Step ... + -Step N: + ACCGCGTTGATCGAGTACGTACGTGGGTC + ||||||||| + ....................ACGTGGGTCCCG + Success & return! + + Third, we also removed the synthesized bases at the end of RRBS fragments. + Take the "C-CGG" cutting site as example, + + - - C|U G G - - =>cut=> - - C =>add=> - - C|C G =>sequencing + - - G G C|C - - - - G G C - - G G C + + In our algorithm, the "CG" in "--CCG" (upper strand) was trimmed, in order to get accurate methyaltion level. + +
--- a/BSseeker2/RELEASE_NOTES Fri Jul 12 18:47:28 2013 -0400 +++ b/BSseeker2/RELEASE_NOTES Tue Nov 05 01:55:39 2013 -0500 @@ -3,5 +3,25 @@ 1. Fix bug in utils.py 2. Add UPDATE file +v2.0.4 - July 29, 2013 +1. Update README.md (Definition of ATCGmap format) +2. Fix the "None" output when no parameter +3. Fix error report when using wrong path for short reads aligner +4. MIT License + +V2.0.5 - Nov 31, 2013 +1. Fix bug in "bs_single_end.py" for "numbers_premapped_lst[0] += len(Unique_FW_C2T)" +2. Make the default aligner to Bowtie +3. Update the README file +4. "-r N" in call-methylation will only control Wiggle file +5. Change the output of pair-end mapping to the standard format of SAM file (the QNAME and FLAG fields) +6. The artificially synthesized bases at the ends of RRBS fragments are removed when removing the adapters +7. Add the total number of bases of uniquely mapped reads to the STDOUT +8. Support "end-to-end" alignment mode in the Galaxy version +9. Support input file in both TEXT or Compressed (.gz) format +10. "-m" allow both INT and FLOAT as input. Ex: '-m 5' or '-m 0.05' +11. Add mode information in output + +
--- a/BSseeker2/bs_align/bs_align_utils.py Fri Jul 12 18:47:28 2013 -0400 +++ b/BSseeker2/bs_align/bs_align_utils.py Tue Nov 05 01:55:39 2013 -0500 @@ -54,9 +54,15 @@ """ -def RemoveAdapter ( read, adapter, no_mismatch ) : +# Remove the adapter from 3' end +def RemoveAdapter ( read, adapter, no_mismatch, rm_back=0) : lr = len(read) la = len(adapter) + # Check the empty adapter, namely, the reads start with the 2nd base of adapter, + # not including the 'A' base in front of the adapter. + if adapter[2:] == read[0:(la-1)] : + return "" + for i in xrange( lr - no_mismatch ) : read_pos = i adapter_pos = 0 @@ -74,8 +80,14 @@ adapter_pos = adapter_pos + 1 # while_end + # Cut the extra bases before the adapter + # --C|CG G-- => --CNN+A+<adapter> + # --G GC|C-- --GGC if adapter_pos == la or read_pos == lr : - return read[:i] + if i <= rm_back : + return '' + else : + return read[:(i-rm_back)] # for_end return read
--- a/BSseeker2/bs_align/bs_pair_end.py Fri Jul 12 18:47:28 2013 -0400 +++ b/BSseeker2/bs_align/bs_pair_end.py Tue Nov 05 01:55:39 2013 -0500 @@ -104,13 +104,13 @@ aligner_command, db_path, tmp_path, - outfile, XS_pct, XS_count, show_multiple_hit=False): + outfile, XS_pct, XS_count, adapter_mismatch, show_multiple_hit=False): #---------------------------------------------------------------- - adapter="" - adapterA="" - adapterB="" + adapter = "" + adapterA = "" + adapterB = "" if adapter_file !="": try : adapter_inf=open(adapter_file,"r") @@ -134,12 +134,15 @@ logm("End 1 filename: %s"% main_read_file_1 ) logm("End 2 filename: %s"% main_read_file_2 ) logm("The first base (for mapping): %d"% cut1 ) - logm("The last base (for mapping): %d"% cut2 ) + logm("The last base (for mapping): %d"% cut2 ) - logm("-------------------------------- " ) - logm("Un-directional library: %s" % asktag ) logm("Path for short reads aligner: %s"% aligner_command + '\n') logm("Reference genome library path: %s"% db_path ) + + if asktag == "Y" : + logm("Un-directional library" ) + else : + logm("Directional library") logm("Number of mismatches allowed: %s"% max_mismatch_no ) if adapter_file !="": @@ -182,6 +185,9 @@ all_trimmed=0 all_mapped=0 all_mapped_passed=0 + all_base_before_trim=0 + all_base_after_trim=0 + all_base_mapped=0 all_unmapped=0 @@ -192,6 +198,8 @@ uC_lst=[0,0,0] no_my_files=0 + read_id_lst_1 = dict() + read_id_lst_2 = dict() #---------------------------------------------------------------- print "== Start mapping ==" @@ -212,11 +220,14 @@ #---------------------------------------------------------------- outfile_1FCT = tmp_d('Trimmed_FCT_1.fa'+random_id) outfile_1RCT = tmp_d('Trimmed_RCT_1.fa'+random_id) - outfile_2FCT = tmp_d('Trimmed_FCT_2.fa'+random_id) + outfile_2RGA = tmp_d('Trimmed_FCT_2.fa'+random_id) outfile_2RCT = tmp_d('Trimmed_RCT_2.fa'+random_id) try : - read_inf = open(tmp_d(read_file_1),"r") + if read_file_1.endswith(".gz") : # support input file ending with ".gz" + read_inf = gzip.open(tmp_d(read_file_1), "rb") + else : + read_inf = open(tmp_d(read_file_1),"r") except IOError : print "[Error] Cannot open file : %s", tmp_d(read_file_1) exit(-1) @@ -226,128 +237,131 @@ if oneline[0]=="@": # Illumina GAII FastQ (Lister et al Nature 2009) input_format="fastq" - n_fastq=0 elif len(l)==1 and oneline[0]!=">": # pure sequences input_format="seq" elif len(l)==11: # Illumina GAII qseq file input_format="qseq" elif oneline[0]==">": # fasta input_format="fasta" - n_fasta=0 read_inf.close() print "Detected data format: %s" % input_format #---------------------------------------------------------------- read_file_list = [read_file_1, read_file_2] - outfile_FCT_list = [outfile_1FCT, outfile_2FCT] + outfile_FCT_list = [outfile_1FCT, outfile_2RGA] outfile_RCT_list = [outfile_1RCT, outfile_2RCT] n_list = [0, 0] - - for f in range(2): read_file = read_file_list[f] outf_FCT = open(outfile_FCT_list[f], 'w') - outf_RCT = open(outfile_RCT_list[f], 'w') + outf_RGA = open(outfile_RCT_list[f], 'w') original_bs_reads = original_bs_reads_lst[f] n = n_list[f] - - seq_id = "" + read_id = "" seq = "" seq_ready = "N" + line_no = 0 for line in fileinput.input(tmp_d(read_file)): - l=line.split() + l = line.split() + line_no += 1 if input_format=="seq": - n+=1 - seq_id=str(n) - seq_id=seq_id.zfill(12) - seq=l[0] - seq_ready="Y" + n += 1 + read_id = str(n) + read_id = read_id.zfill(12) + if f == 1 : + read_id_lst_1[read_id] = read_id + else : + read_id_lst_2[read_id] = read_id + seq = l[0] + seq_ready = "Y" elif input_format=="fastq": - m_fastq=math.fmod(n_fastq,4) - n_fastq+=1 - seq_ready="N" - if m_fastq==0: - n+=1 - seq_id=str(n) - seq_id=seq_id.zfill(12) - seq="" - elif m_fastq==1: - seq=l[0] - seq_ready="Y" - else: - seq="" + l_fastq = math.fmod(line_no, 4) + if l_fastq==1 : + all_raw_reads += 1 + read_id = str(line_no/4+1).zfill(12) + if f==1 : + read_id_lst_1[read_id] = l[0][1:] + else : + read_id_lst_2[read_id] = l[0][1:] + seq_ready="N" + elif l_fastq==2 : + seq = l[0] + seq_ready = "Y" + else : + seq = "" + seq_ready = "N" elif input_format=="qseq": - n+=1 - seq_id=str(n) - seq_id=seq_id.zfill(12) - seq=l[8] - seq_ready="Y" + all_raw_reads += 1 + read_id = str(line_no) + read_id = read_id.zfill(12) + if f == 1 : + read_id_lst_1[read_id] = l[0][1:] + else : + read_id_lst_2[read_id] = l[0][1:] + seq = l[8] + seq_ready = "Y" elif input_format=="fasta": - m_fasta=math.fmod(n_fasta,2) - n_fasta+=1 - seq_ready="N" - if m_fasta==0: - n+=1 - seq_id=l[0][1:] - seq_id=seq_id.zfill(17) - seq="" - elif m_fasta==1: - seq=l[0] - seq_ready="Y" - else: - seq="" + l_fasta = math.fmod(line_no,2) + seq_ready = "N" + if l_fasta==1: + all_raw_reads += 1 + read_id = str(line_no/2+1).zfill(12) + if f==1 : + read_id_lst_1[read_id] = l[0][1:] + else : + read_id_lst_2[read_id] = l[0][1:] + seq = "" + elif l_fasta==0 : + seq = l[0] + seq_ready = "Y" #---------------------------------------------------------------- if seq_ready=="Y": - seq=seq[cut1-1:cut2] #------- selecting 0..52 from 1..72 -e 52 - seq=seq.upper() - seq=seq.replace(".","N") + seq = seq[cut1-1:cut2] #------- selecting 0..52 from 1..72 -e 52 + seq = seq.upper() + seq = seq.replace(".","N") #--striping BS adapter from 3' read -------------------------------------------------------------- - if (adapterA !="") and (adapterB !=""): - signature=adapterA[:6] - if signature in seq: - signature_pos=seq.index(signature) - if seq[signature_pos:] in adapterA: - seq=seq[:signature_pos]#+"".join(["N" for x in range(len(seq)-len(signature_pos))]) - all_trimmed+=1 - else: - signature=adapterB[:6] - if signature in seq: - #print seq_id,seq,signature; - signature_pos=seq.index(signature) - if seq[signature_pos:] in adapterB: - seq=seq[:signature_pos]#+"".join(["N" for x in range(len(seq)-len(signature_pos))]) - all_trimmed+=1 + all_base_before_trim += len(seq) + if f==0 and adapterA!="" : + new_read = RemoveAdapter(seq, adapterA, adapter_mismatch) + if len(new_read)<len(seq) : + all_trimmed += 1 + seq = new_read + if f==1 and adapterB!="" : + new_read = RemoveAdapter(seq, adapterB, adapter_mismatch) + if len(new_read)<len(seq) : + all_trimmed += 1 + seq = new_read + all_base_after_trim += len(seq) - if len(seq) <= 4: + if len(seq)<=4: seq = "N" * (cut2-cut1+1) #--------- trimmed_raw_BS_read ------------------ - original_bs_reads[seq_id] = seq + original_bs_reads[read_id] = seq #--------- FW_C2T ------------------ - outf_FCT.write('>%s\n%s\n' % (seq_id, seq.replace("C","T"))) + outf_FCT.write('>%s\n%s\n' % (read_id, seq.replace("C","T"))) #--------- RC_G2A ------------------ - outf_RCT.write('>%s\n%s\n' % (seq_id, seq.replace("G","A"))) + outf_RGA.write('>%s\n%s\n' % (read_id, seq.replace("G","A"))) - n_list[f]=n + n_list[f] = n outf_FCT.close() - outf_RCT.close() + outf_RGA.close() fileinput.close() - #print "All input end 1: %d , end 2: %d "%(n_list[0],n_list[1]); - all_raw_reads+=n + all_raw_reads += n #-------------------------------------------------------------------------------- # Bowtie mapping #-------------------------------------------------------------------------------- - WC2T_fr=tmp_d("W_C2T_fr_m"+max_mismatch_no+".mapping"+random_id) - WC2T_rf=tmp_d("W_C2T_rf_m"+max_mismatch_no+".mapping"+random_id) - CC2T_fr=tmp_d("C_C2T_fr_m"+max_mismatch_no+".mapping"+random_id) - CC2T_rf=tmp_d("C_C2T_rf_m"+max_mismatch_no+".mapping"+random_id) + WC2T_fr = tmp_d("W_C2T_fr_m"+max_mismatch_no+".mapping"+random_id) + WC2T_rf = tmp_d("W_C2T_rf_m"+max_mismatch_no+".mapping"+random_id) + CG2A_fr = tmp_d("C_C2T_fr_m"+max_mismatch_no+".mapping"+random_id) + CC2T_rf = tmp_d("C_C2T_rf_m"+max_mismatch_no+".mapping"+random_id) run_in_parallel([aligner_command % {'reference_genome' : os.path.join(db_path,'W_C2T'), 'input_file_1' : outfile_1FCT, @@ -357,64 +371,62 @@ aligner_command % {'reference_genome' : os.path.join(db_path,'C_C2T'), 'input_file_1' : outfile_1FCT, 'input_file_2' : outfile_2RCT, - 'output_file' : CC2T_fr}, + 'output_file' : CG2A_fr}, aligner_command % {'reference_genome' : os.path.join(db_path,'W_C2T'), - 'input_file_1' : outfile_2FCT, + 'input_file_1' : outfile_2RGA, 'input_file_2' : outfile_1RCT, 'output_file' : WC2T_rf}, aligner_command % {'reference_genome' : os.path.join(db_path,'C_C2T'), - 'input_file_1' : outfile_2FCT, + 'input_file_1' : outfile_2RGA, 'input_file_2' : outfile_1RCT, 'output_file' : CC2T_rf}]) - - delete_files(outfile_1FCT, outfile_2FCT, outfile_1RCT, outfile_2RCT) + delete_files(outfile_1FCT, outfile_2RGA, outfile_1RCT, outfile_2RCT) #-------------------------------------------------------------------------------- # Post processing #-------------------------------------------------------------------------------- - FW_C2T_fr_U, FW_C2T_fr_R = extract_mapping(WC2T_fr) FW_C2T_rf_U, FW_C2T_rf_R = extract_mapping(WC2T_rf) - RC_C2T_fr_U, RC_C2T_fr_R = extract_mapping(CC2T_fr) + RC_G2A_fr_U, RC_G2A_fr_R = extract_mapping(CG2A_fr) RC_C2T_rf_U, RC_C2T_rf_R = extract_mapping(CC2T_rf) - delete_files(WC2T_fr, WC2T_rf, CC2T_fr, CC2T_rf) + delete_files(WC2T_fr, WC2T_rf, CG2A_fr, CC2T_rf) #---------------------------------------------------------------- # get uniq-hit reads #---------------------------------------------------------------- - Union_set=set(FW_C2T_fr_U.iterkeys()) | set(FW_C2T_rf_U.iterkeys()) | set(RC_C2T_fr_U.iterkeys()) | set(RC_C2T_rf_U.iterkeys()) + Union_set = set(FW_C2T_fr_U.iterkeys()) | set(FW_C2T_rf_U.iterkeys()) | set(RC_G2A_fr_U.iterkeys()) | set(RC_C2T_rf_U.iterkeys()) - Unique_FW_fr_C2T=set() # + - Unique_FW_rf_C2T=set() # + - Unique_RC_fr_C2T=set() # - - Unique_RC_rf_C2T=set() # - - Multiple_hits=set() + Unique_FW_fr_C2T = set() # + + Unique_FW_rf_C2T = set() # + + Unique_RC_fr_G2A = set() # - + Unique_RC_rf_C2T = set() # - + Multiple_hits = set() for x in Union_set: - _list=[] - for d in [FW_C2T_fr_U, FW_C2T_rf_U, RC_C2T_fr_U, RC_C2T_rf_U]: - mis_lst=d.get(x,[99]) - mis=int(mis_lst[0]) + _list = [] + for d in [FW_C2T_fr_U, FW_C2T_rf_U, RC_G2A_fr_U, RC_C2T_rf_U]: + mis_lst = d.get(x,[99]) + mis = int(mis_lst[0]) _list.append(mis) - for d in [FW_C2T_fr_R, FW_C2T_rf_R, RC_C2T_fr_R, RC_C2T_rf_R]: - mis=d.get(x,99) + for d in [FW_C2T_fr_R, FW_C2T_rf_R, RC_G2A_fr_R, RC_C2T_rf_R]: + mis = d.get(x,99) _list.append(mis) - mini=min(_list) + mini = min(_list) if _list.count(mini)==1: - mini_index=_list.index(mini) + mini_index = _list.index(mini) if mini_index==0: Unique_FW_fr_C2T.add(x) elif mini_index==1: Unique_FW_rf_C2T.add(x) elif mini_index==2: - Unique_RC_fr_C2T.add(x) + Unique_RC_fr_G2A.add(x) elif mini_index==3: Unique_RC_rf_C2T.add(x) else : @@ -424,7 +436,7 @@ # write reads rejected by Multiple Hits to file if show_multiple_hit : - outf_MH=open("Multiple_hit.fa",'w') + outf_MH = open("Multiple_hit.fa",'w') for i in Multiple_hits : outf_MH.write(">%s\n" % i) outf_MH.write("%s\n" % original_bs_reads[i]) @@ -433,32 +445,32 @@ del Union_set del FW_C2T_fr_R del FW_C2T_rf_R - del RC_C2T_fr_R + del RC_G2A_fr_R del RC_C2T_rf_R - FW_C2T_fr_uniq_lst=[[FW_C2T_fr_U[u][1],u] for u in Unique_FW_fr_C2T] - FW_C2T_rf_uniq_lst=[[FW_C2T_rf_U[u][1],u] for u in Unique_FW_rf_C2T] - RC_C2T_fr_uniq_lst=[[RC_C2T_fr_U[u][1],u] for u in Unique_RC_fr_C2T] - RC_C2T_rf_uniq_lst=[[RC_C2T_rf_U[u][1],u] for u in Unique_RC_rf_C2T] + FW_C2T_fr_uniq_lst = [[FW_C2T_fr_U[u][1],u] for u in Unique_FW_fr_C2T] + FW_C2T_rf_uniq_lst = [[FW_C2T_rf_U[u][1],u] for u in Unique_FW_rf_C2T] + RC_C2T_fr_uniq_lst = [[RC_G2A_fr_U[u][1],u] for u in Unique_RC_fr_G2A] + RC_C2T_rf_uniq_lst = [[RC_C2T_rf_U[u][1],u] for u in Unique_RC_rf_C2T] FW_C2T_fr_uniq_lst.sort() FW_C2T_rf_uniq_lst.sort() RC_C2T_fr_uniq_lst.sort() RC_C2T_rf_uniq_lst.sort() - FW_C2T_fr_uniq_lst=[x[1] for x in FW_C2T_fr_uniq_lst] - FW_C2T_rf_uniq_lst=[x[1] for x in FW_C2T_rf_uniq_lst] - RC_C2T_fr_uniq_lst=[x[1] for x in RC_C2T_fr_uniq_lst] - RC_C2T_rf_uniq_lst=[x[1] for x in RC_C2T_rf_uniq_lst] + FW_C2T_fr_uniq_lst = [x[1] for x in FW_C2T_fr_uniq_lst] + FW_C2T_rf_uniq_lst = [x[1] for x in FW_C2T_rf_uniq_lst] + RC_C2T_fr_uniq_lst = [x[1] for x in RC_C2T_fr_uniq_lst] + RC_C2T_rf_uniq_lst = [x[1] for x in RC_C2T_rf_uniq_lst] #---------------------------------------------------------------- - numbers_premapped_lst[0]+=len(Unique_FW_fr_C2T) - numbers_premapped_lst[1]+=len(Unique_FW_rf_C2T) - numbers_premapped_lst[2]+=len(Unique_RC_fr_C2T) - numbers_premapped_lst[3]+=len(Unique_RC_rf_C2T) + numbers_premapped_lst[0] += len(Unique_FW_fr_C2T) + numbers_premapped_lst[1] += len(Unique_FW_rf_C2T) + numbers_premapped_lst[2] += len(Unique_RC_fr_G2A) + numbers_premapped_lst[3] += len(Unique_RC_rf_C2T) del Unique_FW_fr_C2T del Unique_FW_rf_C2T - del Unique_RC_fr_C2T + del Unique_RC_fr_G2A del Unique_RC_rf_C2T #---------------------------------------------------------------- @@ -466,7 +478,7 @@ nn = 0 for ali_unique_lst, ali_dic in [(FW_C2T_fr_uniq_lst,FW_C2T_fr_U), (FW_C2T_rf_uniq_lst,FW_C2T_rf_U), - (RC_C2T_fr_uniq_lst,RC_C2T_fr_U), + (RC_C2T_fr_uniq_lst,RC_G2A_fr_U), (RC_C2T_rf_uniq_lst,RC_C2T_rf_U)]: nn += 1 @@ -476,15 +488,15 @@ _, mapped_chr, mapped_location_1, cigar1, mapped_location_2, cigar2 = ali_dic[header] #------------------------------------- - if mapped_chr != mapped_chr0: - my_gseq=deserialize(db_d(mapped_chr)) - chr_length=len(my_gseq) - mapped_chr0=mapped_chr + if mapped_chr!=mapped_chr0: + my_gseq = deserialize(db_d(mapped_chr)) + chr_length = len(my_gseq) + mapped_chr0 = mapped_chr #------------------------------------- - if nn == 1 or nn == 3: + if nn==1 or nn==3 : original_BS_1 = original_bs_reads_1[header] original_BS_2 = reverse_compl_seq(original_bs_reads_2[header]) - else: + else : original_BS_1 = original_bs_reads_2[header] original_BS_2 = reverse_compl_seq(original_bs_reads_1[header]) @@ -493,83 +505,41 @@ all_mapped += 1 - if nn == 1: # FW-RC mapped to + strand: - + if nn==1 : # FW-RC mapped to + strand: FR = "+FR" - -# mapped_location_1 += 1 -# origin_genome_long_1 = my_gseq[mapped_location_1 - 2 - 1 : mapped_location_1 + g_len_1 + 2 - 1] -# origin_genome_1 = origin_genome_long_1[2:-2] mapped_strand_1 = "+" - -# mapped_location_2 += 1 -# origin_genome_long_2 = my_gseq[mapped_location_2 - 2 - 1 : mapped_location_2 + g_len_2 + 2 - 1] -# origin_genome_2 = origin_genome_long_2[2:-2] mapped_strand_2 = "+" - - elif nn==2: # RC-FW mapped to + strand: - -# original_BS_1 = original_bs_reads_2[header] -# original_BS_2 = reverse_compl_seq(original_bs_reads_1[header]) + elif nn==2 : # RC-FW mapped to + strand: FR = "+RF" -# mapped_location_1 += 1 -# origin_genome_long_1 = my_gseq[mapped_location_1 - 2 - 1 : mapped_location_1 + g_len_1 + 2 - 1] -# origin_genome_1 = origin_genome_long_1[2:-2] mapped_strand_1 = "+" - -# mapped_location_2 += 1 -# origin_genome_long_2 = my_gseq[mapped_location_2 - 2 - 1 : mapped_location_2 + g_len_2 + 2 - 1] -# origin_genome_2 = origin_genome_long_2[2:-2] mapped_strand_2 = "+" - - - elif nn==3: # FW-RC mapped to - strand: -# original_BS_1=original_bs_reads_1[header] -# original_BS_2=reverse_compl_seq(original_bs_reads_2[header]) - + elif nn==3 : # FW-RC mapped to - strand: FR = "-FR" mapped_location_1 = chr_length - mapped_location_1 - g_len_1 -# origin_genome_long_1 = my_gseq[mapped_location_1 - 2 - 1 : mapped_location_1 + g_len_1 + 2 - 1] -# origin_genome_long_1 = reverse_compl_seq(origin_genome_long_1) -# origin_genome_1 = origin_genome_long_1[2:-2] mapped_strand_1 = "-" - mapped_location_2 = chr_length - mapped_location_2 - g_len_2 -# origin_genome_long_2 = reverse_compl_seq(my_gseq[mapped_location_2 - 2 - 1 : mapped_location_2 + g_len_2 + 2 - 1 ]) -# origin_genome_2 = origin_genome_long_2[2:-2] mapped_strand_2 = "-" - - elif nn==4: # RC-FW mapped to - strand: -# original_BS_1=original_bs_reads_2[header] -# original_BS_2=reverse_compl_seq(original_bs_reads_1[header]) - + elif nn==4 : # RC-FW mapped to - strand: FR = "-RF" mapped_location_1 = chr_length - mapped_location_1 - g_len_1 -# origin_genome_long_1 = my_gseq[mapped_location_1 - 2 - 1 : mapped_location_1 + g_len_1 + 2 - 1] -# origin_genome_long_1 = reverse_compl_seq(origin_genome_long_1) -# origin_genome_1 = origin_genome_long_1[2:-2] mapped_strand_1 = "-" - mapped_location_2 = chr_length - mapped_location_2 - g_len_2 -# origin_genome_long_2 = reverse_compl_seq(my_gseq[mapped_location_2 - 2 - 1 : mapped_location_2 + g_len_2 + 2 - 1]) -# origin_genome_2 = origin_genome_long_2[2:-2] mapped_strand_2 = "-" - - - origin_genome_1, next_1, output_genome_1 = get_genomic_sequence(my_gseq, mapped_location_1, mapped_location_1 + g_len_1, mapped_strand_1) origin_genome_2, next_2, output_genome_2 = get_genomic_sequence(my_gseq, mapped_location_2, mapped_location_2 + g_len_2, mapped_strand_2) r_aln_1, g_aln_1 = cigar_to_alignment(cigar1, original_BS_1, origin_genome_1) r_aln_2, g_aln_2 = cigar_to_alignment(cigar2, original_BS_2, origin_genome_2) - N_mismatch_1 = N_MIS(r_aln_1, g_aln_1) #+ original_BS_length_1 - (r_end_1 - r_start_1) # mismatches in the alignment + soft clipped nucleotides N_mismatch_2 = N_MIS(r_aln_2, g_aln_2) #+ original_BS_length_2 - (r_end_2 - r_start_2) # mismatches in the alignment + soft clipped nucleotides + N_mismatch = max(N_mismatch_1, N_mismatch_2) + mm_no=float(max_mismatch_no) + if (mm_no>=1 and N_mismatch<=mm_no) or (mm_no<1 and N_mismatch_1<=mm_no*len(original_BS_1) + and N_mismatch_2<=mm_no*len(original_BS_2)): - if max(N_mismatch_1, N_mismatch_2) <= int(max_mismatch_no) : all_mapped_passed += 1 numbers_mapped_lst[nn-1] += 1 #---- unmapped ------------------------- @@ -577,43 +547,46 @@ del original_bs_reads_2[header] #--------------------------------------- -# output_genome_1 = origin_genome_long_1[0:2] + "_" + origin_genome_1 + "_" + origin_genome_long_1[-2:] -# output_genome_2 = origin_genome_long_2[0:2] + "_" + origin_genome_2 + "_" + origin_genome_long_2[-2:] - - methy_1=methy_seq(r_aln_1, g_aln_1 + next_1) - methy_2=methy_seq(r_aln_2, g_aln_2 + next_2) + methy_1 = methy_seq(r_aln_1, g_aln_1 + next_1) + methy_2 = methy_seq(r_aln_2, g_aln_2 + next_2) mC_lst, uC_lst = mcounts(methy_1, mC_lst, uC_lst) mC_lst, uC_lst = mcounts(methy_2, mC_lst, uC_lst) - - # #---XS FILTER---------------- - #XS = 1 if "ZZZ" in methy.replace('-', '') else 0 XS_1 = 0 nCH_1 = methy_1.count('y') + methy_1.count('z') nmCH_1 = methy_1.count('Y') + methy_1.count('Z') if( (nmCH_1>XS_count) and nmCH_1/float(nCH_1+nmCH_1)>XS_pct ) : XS_1 = 1 - #XS = 1 if "ZZZ" in methy.replace('-', '') else 0 XS_2 = 0 nCH_2 = methy_2.count('y') + methy_2.count('z') nmCH_2 = methy_2.count('Y') + methy_2.count('Z') if( (nmCH_2>XS_count) and nmCH_2/float(nCH_2+nmCH_2)>XS_pct ) : XS_2 = 1 + if mapped_strand_1=='+' : + flag_1 = 67 # 1000011 : 1st read, + strand + flag_2 = 131 #10000011 : 2nd read, + strand + else : + flag_1 = 115 # 1110011 : 1st read, - strand + flag_2 = 179 # 10110011 : 2nd read, - strand - outfile.store(header, N_mismatch_1, FR, mapped_chr, mapped_strand_1, mapped_location_1, cigar1, original_BS_1, methy_1, XS_1, + outfile.store2( read_id_lst_1[header], flag_1, N_mismatch_1, FR, mapped_chr, mapped_strand_1, mapped_location_1, cigar1, original_BS_1, methy_1, XS_1, output_genome = output_genome_1, rnext = mapped_chr, pnext = mapped_location_2) - outfile.store(header, N_mismatch_2, FR, mapped_chr, mapped_strand_2, mapped_location_2, cigar2, original_BS_2, methy_2, XS_2, + all_base_mapped += len(original_BS_1) + + outfile.store2( read_id_lst_2[header], flag_2, N_mismatch_2, FR, mapped_chr, mapped_strand_2, mapped_location_2, cigar2, original_BS_2, methy_2, XS_2, output_genome = output_genome_2, rnext = mapped_chr, pnext = mapped_location_1) + all_base_mapped += len(original_BS_2) + print "--> %s %s (%d/%d) " % (read_file_1, read_file_2, no_my_files, len(my_files)) #---------------------------------------------------------------- # output unmapped pairs #---------------------------------------------------------------- - unmapped_lst=original_bs_reads_1.keys() + unmapped_lst = original_bs_reads_1.keys() unmapped_lst.sort() # for u in unmapped_lst: @@ -623,189 +596,188 @@ all_unmapped += len(unmapped_lst) + # Directional library if asktag=="N": #---------------------------------------------------------------- - outfile_1FCT= tmp_d('Trimed_FCT_1.fa'+random_id) - outfile_2FCT= tmp_d('Trimed_FCT_2.fa'+random_id) + outfile_1FCT = tmp_d('Trimed_FCT_1.fa'+random_id) + outfile_2RGA = tmp_d('Trimed_RGA_2.fa'+random_id) try : - read_inf=open(tmp_d(read_file_1),"r") + if read_file_1.endswith(".gz") : # support input file ending with ".gz" + read_inf = gzip.open(tmp_d(read_file_1), "rb") + else : + read_inf = open(tmp_d(read_file_1),"r") except IOError : print "[Error] Cannot open file : %s", tmp_d(read_file_1) exit(-1) - oneline=read_inf.readline() - l=oneline.split() - input_format="" - - #if len(l)==5: # old solexa format - # input_format="old Solexa Seq file" + oneline = read_inf.readline() + l = oneline.split() + input_format = "" - if oneline[0]=="@": # Illumina GAII FastQ (Lister et al Nature 2009) - input_format="FastQ" - n_fastq=0 - elif len(l)==1 and oneline[0]!=">": # pure sequences - input_format="_list of sequences" - elif len(l)==11: # Illumina GAII qseq file - input_format="Illumina GAII qseq file" - elif oneline[0]==">": # fasta - input_format="fasta" - n_fasta=0 + if oneline[0]=="@" : + input_format = "fastq" + elif len(l)==1 and oneline[0]!=">" : + input_format = "seq" + elif len(l)==11 : + input_format = "qseq" + elif oneline[0]==">" : + input_format = "fasta" read_inf.close() print "Detected data format: %s" % input_format #---------------------------------------------------------------- - read_file_list=[read_file_1,read_file_2] - outfile_FCT_list=[outfile_1FCT,outfile_2FCT] - n_list=[0,0] + read_file_list = [read_file_1,read_file_2] + outfile_FCT_list = [outfile_1FCT,outfile_2RGA] + n_list = [0,0] for f in range(2): - read_file=read_file_list[f] - outf_FCT=open(outfile_FCT_list[f],'w') + read_file = read_file_list[f] + outf_FCT = open(outfile_FCT_list[f],'w') original_bs_reads = original_bs_reads_lst[f] - n=n_list[f] + n = n_list[f] - seq_id="" - seq="" - seq_ready="N" + read_id = "" + seq = "" + seq_ready = "N" + line_no = 0 for line in fileinput.input(tmp_d(read_file)): - l=line.split() - if input_format=="old Solexa Seq file": - n+=1 - seq_id=str(n) - seq_id=seq_id.zfill(12) - seq=l[4] - seq_ready="Y" - elif input_format=="_list of sequences": - n+=1 - seq_id=str(n) - seq_id=seq_id.zfill(12) - seq=l[0] - seq_ready="Y" - elif input_format=="FastQ": - m_fastq=math.fmod(n_fastq,4) - n_fastq+=1 - seq_ready="N" - if m_fastq==0: - n+=1 - seq_id=str(n) - seq_id=seq_id.zfill(12) - seq="" - elif m_fastq==1: - seq=l[0] - seq_ready="Y" - else: - seq="" - elif input_format=="Illumina GAII qseq file": - n+=1 - seq_id=str(n) - seq_id=seq_id.zfill(12) - seq=l[8] - seq_ready="Y" + l = line.split() + line_no += 1 + if input_format=="seq": + n += 1 + read_id = str(n) + read_id = read_id.zfill(12) + if f == 1 : + read_id_lst_1[read_id] = read_id + else : + read_id_lst_2[read_id] = read_id + seq = l[0] + seq_ready = "Y" + elif input_format=="fastq": + l_fastq = math.fmod(line_no, 4) + if l_fastq==1 : + all_raw_reads += 1 + read_id = str(line_no/4+1).zfill(12) + if f==1 : + read_id_lst_1[read_id] = l[0][1:] + else : + read_id_lst_2[read_id] = l[0][1:] + seq_ready = "N" + elif l_fastq==2 : + seq = l[0] + seq_ready = "Y" + else : + seq = "" + seq_ready = "N" + elif input_format=="qseq": + all_raw_reads += 1 + read_id = str(line_no) + read_id = read_id.zfill(12) + if f == 1 : + read_id_lst_1[read_id] = l[0][1:] + else : + read_id_lst_2[read_id] = l[0][1:] + seq = l[8] + seq_ready = "Y" elif input_format=="fasta": - m_fasta=math.fmod(n_fasta,2) - n_fasta+=1 - seq_ready="N" - if m_fasta==0: - n+=1 - seq_id=l[0][1:] - seq_id=seq_id.zfill(17) - seq="" - elif m_fasta==1: - seq=l[0] - seq_ready="Y" - else: - seq="" + l_fasta = math.fmod(line_no,2) + if l_fasta==1: + seq_ready = "N" + all_raw_reads += 1 + read_id = str(line_no/2+1).zfill(12) + if f == 1 : + read_id_lst_1[read_id] = l[0][1:] + else : + read_id_lst_2[read_id] = l[0][1:] + seq = "" + elif l_fasta==0 : + seq = l[0] + seq_ready = "Y" #---------------------------------------------------------------- if seq_ready=="Y": - seq=seq[cut1-1:cut2] #<----------------------selecting 0..52 from 1..72 -e 52 - seq=seq.upper() - seq=seq.replace(".","N") + seq = seq[cut1-1:cut2] #<----------------------selecting 0..52 from 1..72 -e 52 + seq = seq.upper() + seq = seq.replace(".","N") #--striping BS adapter from 3' read -------------------------------------------------------------- - if (adapterA !="") and (adapterB !=""): - signature=adapterA[:6] - if signature in seq: - signature_pos=seq.index(signature) - if seq[signature_pos:] in adapterA: - seq=seq[:signature_pos]#+"".join(["N" for x in range(len(seq)-len(signature_pos))]) - all_trimmed+=1 - else: - signature=adapterB[:6] - if signature in seq: - #print seq_id,seq,signature; - signature_pos=seq.index(signature) - if seq[signature_pos:] in adapterB: - seq=seq[:signature_pos]#+"".join(["N" for x in range(len(seq)-len(signature_pos))]) - all_trimmed+=1 + all_base_before_trim += len(seq) + if f==0 and adapterA!="" : + new_read = RemoveAdapter(seq, adapterA, adapter_mismatch) + if len(new_read) < len(seq) : + all_trimmed += 1 + seq = new_read + if f==1 and adapterB!="" : + new_read = RemoveAdapter(seq, adapterB, adapter_mismatch) + if len(new_read)<len(seq) : + all_trimmed += 1 + seq = new_read + all_base_after_trim += len(seq) - if len(seq) <= 4: + if len(seq)<=4: seq = "N" * (cut2-cut1+1) #--------- trimmed_raw_BS_read ------------------ - original_bs_reads[seq_id] = seq + original_bs_reads[read_id] = seq #--------- FW_C2T ------------------ if f==0: - outf_FCT.write('>%s\n%s\n'% (seq_id, seq.replace("C","T"))) + outf_FCT.write('>%s\n%s\n'% (read_id, seq.replace("C","T"))) elif f==1: - outf_FCT.write('>%s\n%s\n'% (seq_id, reverse_compl_seq(seq).replace("C","T"))) + outf_FCT.write('>%s\n%s\n'% (read_id, seq.replace("G","A"))) - - n_list[f]=n + n_list[f] = n outf_FCT.close() - fileinput.close() - #print "All input end 1: %d , end 2: %d "%(n_list[0],n_list[1]); - all_raw_reads+=n + all_raw_reads += n #-------------------------------------------------------------------------------- # Bowtie mapping #-------------------------------------------------------------------------------- - WC2T_fr=tmp_d("W_C2T_fr_m"+max_mismatch_no+".mapping"+random_id) - CC2T_fr=tmp_d("C_C2T_fr_m"+max_mismatch_no+".mapping"+random_id) + WC2T_fr = tmp_d("W_C2T_fr_m"+max_mismatch_no+".mapping"+random_id) + CG2A_fr = tmp_d("C_C2T_fr_m"+max_mismatch_no+".mapping"+random_id) run_in_parallel([ aligner_command % {'reference_genome' : os.path.join(db_path,'W_C2T'), 'input_file_1' : outfile_1FCT, - 'input_file_2' : outfile_2FCT, + 'input_file_2' : outfile_2RGA, 'output_file' : WC2T_fr}, aligner_command % {'reference_genome' : os.path.join(db_path,'C_C2T'), 'input_file_1' : outfile_1FCT, - 'input_file_2' : outfile_2FCT, - 'output_file' : CC2T_fr} ]) + 'input_file_2' : outfile_2RGA, + 'output_file' : CG2A_fr} ]) - delete_files(outfile_1FCT, outfile_2FCT) + delete_files(outfile_1FCT, outfile_2RGA) #-------------------------------------------------------------------------------- # Post processing #-------------------------------------------------------------------------------- FW_C2T_fr_U, FW_C2T_fr_R = extract_mapping(WC2T_fr) - RC_C2T_fr_U, RC_C2T_fr_R = extract_mapping(CC2T_fr) + RC_G2A_fr_U, RC_G2A_fr_R = extract_mapping(CG2A_fr) #---------------------------------------------------------------- # get uniq-hit reads #---------------------------------------------------------------- - Union_set = set(FW_C2T_fr_U.iterkeys()) | set(RC_C2T_fr_U.iterkeys()) + Union_set = set(FW_C2T_fr_U.iterkeys()) | set(RC_G2A_fr_U.iterkeys()) Unique_FW_fr_C2T = set() # + - Unique_RC_fr_C2T = set() # - + Unique_RC_fr_G2A = set() # - Multiple_hits=set() for x in Union_set: _list = [] - for d in [FW_C2T_fr_U, RC_C2T_fr_U]: + for d in [FW_C2T_fr_U, RC_G2A_fr_U]: mis_lst = d.get(x,[99]) mis = int(mis_lst[0]) _list.append(mis) - for d in [FW_C2T_fr_R, RC_C2T_fr_R]: + for d in [FW_C2T_fr_R, RC_G2A_fr_R]: mis = d.get(x,99) _list.append(mis) mini = min(_list) @@ -814,7 +786,7 @@ if mini_index == 0: Unique_FW_fr_C2T.add(x) elif mini_index == 1: - Unique_RC_fr_C2T.add(x) + Unique_RC_fr_G2A.add(x) else : Multiple_hits.add(x) else : @@ -822,35 +794,34 @@ # write reads rejected by Multiple Hits to file if show_multiple_hit : - outf_MH=open("Multiple_hit.fa",'w') + outf_MH = open("Multiple_hit.fa",'w') for i in Multiple_hits : outf_MH.write(">%s\n" % i) outf_MH.write("%s\n" % original_bs_reads[i]) outf_MH.close() - FW_C2T_fr_uniq_lst=[[FW_C2T_fr_U[u][1],u] for u in Unique_FW_fr_C2T] - RC_C2T_fr_uniq_lst=[[RC_C2T_fr_U[u][1],u] for u in Unique_RC_fr_C2T] + FW_C2T_fr_uniq_lst = [[FW_C2T_fr_U[u][1],u] for u in Unique_FW_fr_C2T] + RC_C2T_fr_uniq_lst = [[RC_G2A_fr_U[u][1],u] for u in Unique_RC_fr_G2A] FW_C2T_fr_uniq_lst.sort() RC_C2T_fr_uniq_lst.sort() - FW_C2T_fr_uniq_lst=[x[1] for x in FW_C2T_fr_uniq_lst] - RC_C2T_fr_uniq_lst=[x[1] for x in RC_C2T_fr_uniq_lst] + FW_C2T_fr_uniq_lst = [x[1] for x in FW_C2T_fr_uniq_lst] + RC_C2T_fr_uniq_lst = [x[1] for x in RC_C2T_fr_uniq_lst] #---------------------------------------------------------------- - numbers_premapped_lst[0]+=len(Unique_FW_fr_C2T) - numbers_premapped_lst[1]+=len(Unique_RC_fr_C2T) + numbers_premapped_lst[0] += len(Unique_FW_fr_C2T) + numbers_premapped_lst[1] += len(Unique_RC_fr_G2A) #---------------------------------------------------------------- nn = 0 - for ali_unique_lst, ali_dic in [(FW_C2T_fr_uniq_lst,FW_C2T_fr_U), (RC_C2T_fr_uniq_lst,RC_C2T_fr_U)]: + for ali_unique_lst, ali_dic in [(FW_C2T_fr_uniq_lst,FW_C2T_fr_U), (RC_C2T_fr_uniq_lst,RC_G2A_fr_U)]: nn += 1 mapped_chr0 = "" for header in ali_unique_lst: _, mapped_chr, mapped_location_1, cigar1, mapped_location_2, cigar2 = ali_dic[header] - #------------------------------------- if mapped_chr != mapped_chr0: my_gseq = deserialize(db_d(mapped_chr)) @@ -860,6 +831,7 @@ original_BS_1 = original_bs_reads_1[header] original_BS_2 = reverse_compl_seq(original_bs_reads_2[header]) + #original_BS_2 = original_bs_reads_2[header] r_start_1, r_end_1, g_len_1 = get_read_start_end_and_genome_length(cigar1) r_start_2, r_end_2, g_len_2 = get_read_start_end_and_genome_length(cigar2) @@ -868,43 +840,30 @@ if nn == 1: # FW-RC mapped to + strand: FR = "+FR" -# mapped_location_1 += 1 -# origin_genome_long_1 = my_gseq[mapped_location_1 - 2 - 1 : mapped_location_1 + g_len_1 + 2 - 1] -# origin_genome_1 = origin_genome_long_1[2:-2] mapped_strand_1 = "+" - -# mapped_location_2 += 1 -# origin_genome_long_2 = my_gseq[mapped_location_2 - 2 - 1 : mapped_location_2 + g_len_2 + 2 - 1] -# origin_genome_2 = origin_genome_long_2[2:-2] mapped_strand_2 = "+" - elif nn == 2: # FW-RC mapped to - strand: - FR="-FR" mapped_location_1 = chr_length - mapped_location_1 - g_len_1 -# origin_genome_long_1 = my_gseq[mapped_location_1 - 2 - 1 : mapped_location_1 + g_len_1 + 2 - 1] -# origin_genome_long_1 = reverse_compl_seq(origin_genome_long_1) -# origin_genome_1 = origin_genome_long_1[2:-2] mapped_strand_1 = "-" - mapped_location_2 = chr_length - mapped_location_2 - g_len_2 -# origin_genome_long_2 = reverse_compl_seq(my_gseq[mapped_location_2 - 2 - 1 : mapped_location_2 + g_len_2 + 2 - 1]) -# origin_genome_2 = origin_genome_long_2[2:-2] mapped_strand_2 = "-" - - origin_genome_1, next_1, output_genome_1 = get_genomic_sequence(my_gseq, mapped_location_1, mapped_location_1 + g_len_1, mapped_strand_1) origin_genome_2, next_2, output_genome_2 = get_genomic_sequence(my_gseq, mapped_location_2, mapped_location_2 + g_len_2, mapped_strand_2) - r_aln_1, g_aln_1 = cigar_to_alignment(cigar1, original_BS_1, origin_genome_1) r_aln_2, g_aln_2 = cigar_to_alignment(cigar2, original_BS_2, origin_genome_2) N_mismatch_1 = N_MIS(r_aln_1, g_aln_1) #+ original_BS_length_1 - (r_end_1 - r_start_1) # mismatches in the alignment + soft clipped nucleotides N_mismatch_2 = N_MIS(r_aln_2, g_aln_2) #+ original_BS_length_2 - (r_end_2 - r_start_2) # mismatches in the alignment + soft clipped nucleotides - if max(N_mismatch_1, N_mismatch_2) <= int(max_mismatch_no): +# if max(N_mismatch_1, N_mismatch_2) <= int(max_mismatch_no): +# if N_mismatch <= int(max_mismatch_no) : + N_mismatch = max(N_mismatch_1, N_mismatch_2) + mm_no=float(max_mismatch_no) + if (mm_no>=1 and N_mismatch<=mm_no) or (mm_no<1 and N_mismatch_1<=mm_no*len(original_BS_1) + and N_mismatch_2<=mm_no*len(original_BS_2)): numbers_mapped_lst[nn-1] += 1 all_mapped_passed += 1 @@ -914,8 +873,6 @@ del original_bs_reads_2[header] #--------------------------------------- -# output_genome_1 = origin_genome_long_1[0:2] + "_" + origin_genome_1 + "_" + origin_genome_long_1[-2:] -# output_genome_2 = origin_genome_long_2[0:2] + "_" + origin_genome_2 + "_" + origin_genome_long_2[-2:] methy_1=methy_seq(r_aln_1, g_aln_1 + next_1) methy_2=methy_seq(r_aln_2, g_aln_2 + next_2) mC_lst,uC_lst = mcounts(methy_1, mC_lst, uC_lst) @@ -936,11 +893,20 @@ if( (nmCH_2>XS_count) and nmCH_2/float(nCH_2+nmCH_2)>XS_pct ) : XS_2 = 1 + if mapped_strand_1 == '+' : + flag_1 = 67 # 1000011 : 1st read, + strand + flag_2 = 131 # 10000011 : 2nd read, + strand + else : + flag_1 = 115 # 1110011 : 1st read, - strand + flag_2 = 179 # 10110011 : 2nd read, - strand - outfile.store(header, N_mismatch_1, FR, mapped_chr, mapped_strand_1, mapped_location_1, cigar1, original_BS_1, methy_1, XS_1, + outfile.store2( read_id_lst_1[header], flag_1, N_mismatch_1, FR, mapped_chr, mapped_strand_1, mapped_location_1, cigar1, original_BS_1, methy_1, XS_1, output_genome = output_genome_1, rnext = mapped_chr, pnext = mapped_location_2) - outfile.store(header, N_mismatch_2, FR, mapped_chr, mapped_strand_2, mapped_location_2, cigar2, original_BS_2, methy_2, XS_2, + all_base_mapped += len(original_BS_1) + + outfile.store2( read_id_lst_2[header], flag_2, N_mismatch_2, FR, mapped_chr, mapped_strand_2, mapped_location_2, cigar2, original_BS_2, methy_2, XS_2, output_genome = output_genome_2, rnext = mapped_chr, pnext = mapped_location_1) + all_base_mapped += len(original_BS_2) print "--> %s %s (%d/%d) " % (read_file_1, read_file_2, no_my_files, len(my_files)) #---------------------------------------------------------------- @@ -956,45 +922,40 @@ all_unmapped+=len(unmapped_lst) - #================================================================================================== -# outf.close() -# -# outf_u1.close() -# outf_u2.close() delete_files(tmp_path) logm("-------------------------------- " ) - logm("O Number of raw BS-read pairs: %d ( %d bp)"%(all_raw_reads,cut2-cut1+1) ) - logm("O Number of ends trimmed for adapter: %d"% all_trimmed+"\n") + logm("Number of raw BS-read pairs: %d " %(all_raw_reads/2) ) + if adapterA != "" or adapterB != "" : + logm("Number of reads having adapter removed: %d \n"% all_trimmed) + logm("Number of bases after trimming the adapters: %d (%1.3f)" % (all_base_after_trim, float(all_base_after_trim)/all_base_before_trim) ) if all_raw_reads >0: - logm("O Number of unique-hits read pairs for post-filtering: %d" % all_mapped + "\n") + logm("Number of reads rejected because of multiple hits: %d\n" % len(Multiple_hits) ) + logm("Number of unique-hits reads (before post-filtering): %d" % all_mapped + "\n") if asktag=="Y": - logm("O -- %7d FW-RC pairs mapped to Watson strand (before post-filtering)"%(numbers_premapped_lst[0]) ) - logm("O -- %7d RC-FW pairs mapped to Watson strand (before post-filtering)"%(numbers_premapped_lst[1]) ) - logm("O -- %7d FW-RC pairs mapped to Crick strand (before post-filtering)"%(numbers_premapped_lst[2]) ) - logm("O -- %7d RC-FW pairs mapped to Crick strand (before post-filtering)"%(numbers_premapped_lst[3]) ) + logm("-- %7d FW-RC pairs mapped to Watson strand (before post-filtering)"%(numbers_premapped_lst[0]) ) + logm("-- %7d RC-FW pairs mapped to Watson strand (before post-filtering)"%(numbers_premapped_lst[1]) ) + logm("-- %7d FW-RC pairs mapped to Crick strand (before post-filtering)"%(numbers_premapped_lst[2]) ) + logm("-- %7d RC-FW pairs mapped to Crick strand (before post-filtering)"%(numbers_premapped_lst[3]) ) elif asktag=="N": - logm("O -- %7d FW-RC pairs mapped to Watson strand (before post-filtering)"%(numbers_premapped_lst[0]) ) - logm("O -- %7d FW-RC pairs mapped to Crick strand (before post-filtering)"%(numbers_premapped_lst[1]) ) - + logm("-- %7d FW-RC pairs mapped to Watson strand (before post-filtering)"%(numbers_premapped_lst[0]) ) + logm("-- %7d FW-RC pairs mapped to Crick strand (before post-filtering)"%(numbers_premapped_lst[1]) ) + logm("--- %d uniquely aligned pairs, where each end has mismatches <= %s"%(all_mapped_passed, max_mismatch_no) ) + if asktag=="Y": + logm("----- %7d FW-RC pairs mapped to Watson strand"%(numbers_mapped_lst[0]) ) + logm("----- %7d RC-FW pairs mapped to Watson strand"%(numbers_mapped_lst[1]) ) + logm("----- %7d FW-RC pairs mapped to Crick strand"%(numbers_mapped_lst[2]) ) + logm("----- %7d RC-FW pairs mapped to Crick strand"%(numbers_mapped_lst[3]) ) + elif asktag=="N": + logm("----- %7d FW-RC pairs mapped to Watson strand"%(numbers_mapped_lst[0]) ) + logm("----- %7d FW-RC pairs mapped to Crick strand"%(numbers_mapped_lst[1]) ) - logm("O --- Number of reads rejected because of multiple hits: %d\n" % len(Multiple_hits) ) - logm("O --- %d uniquely aligned pairs, where each end has mismatches <= %s"%(all_mapped_passed, max_mismatch_no) ) - if asktag=="Y": - logm("O ----- %7d FW-RC pairs mapped to Watson strand"%(numbers_mapped_lst[0]) ) - logm("O ----- %7d RC-FW pairs mapped to Watson strand"%(numbers_mapped_lst[1]) ) - logm("O ----- %7d FW-RC pairs mapped to Crick strand"%(numbers_mapped_lst[2]) ) - logm("O ----- %7d RC-FW pairs mapped to Crick strand"%(numbers_mapped_lst[3]) ) - elif asktag=="N": - logm("O ----- %7d FW-RC pairs mapped to Watson strand"%(numbers_mapped_lst[0]) ) - logm("O ----- %7d FW-RC pairs mapped to Crick strand"%(numbers_mapped_lst[1]) ) - - logm("O Mappability= %1.4f%%"%(100*float(all_mapped_passed)/all_raw_reads) ) - logm("O Unmapped read pairs: %d"% all_unmapped+"\n") - + logm("Mappability= %1.4f%%"%(100*float(all_mapped_passed)*2/all_raw_reads) ) + logm("Total bases of uniquely mapped reads %7d"% all_base_mapped ) + logm("Unmapped read pairs: %d"% all_unmapped+"\n") n_CG=mC_lst[0]+uC_lst[0] n_CHG=mC_lst[1]+uC_lst[1]
--- a/BSseeker2/bs_align/bs_rrbs.py Fri Jul 12 18:47:28 2013 -0400 +++ b/BSseeker2/bs_align/bs_rrbs.py Tue Nov 05 01:55:39 2013 -0500 @@ -42,24 +42,7 @@ # For double enzyme: cut_format="C-CGG,A-CTG"; ApekI:"G^CWGC" #cut_context = re.sub("-", "", cut_format) # Ex. cut_format="C-CGG,AT-CG,G-CWGC" - """ - :param main_read_file: - :param asktag: - :param adapter_file: - :param cut_s: - :param cut_e: - :param no_small_lines: - :param max_mismatch_no: - :param aligner_command: - :param db_path: - :param tmp_path: - :param outfile: - :param XS_pct: - :param XS_count: - :param adapter_mismatch: - :param cut_format: - """ cut_format_lst = EnumerateIUPAC(cut_format.upper().split(",")) # ['G-CAGC', 'AT-CG', 'C-CGG', 'G-CTGC'] cut_context = [i.replace("-","") for i in cut_format_lst] # ['GCAGC', 'ATCG', 'CCGG', 'GCTGC'] cut5_context = [re.match( r'(.*)\-(.*)', i).group(1) for i in cut_format_lst] # ['G', 'AT', 'C', 'G'] @@ -106,18 +89,31 @@ print "[Error] Cannot find adapter file : %s !" % adapter_file exit(-1) - logm("I Read filename: %s" % main_read_file) - logm("I The last cycle (for mapping): %d" % cut_e ) - logm("I Bowtie path: %s" % aligner_command ) - logm("I Reference genome library path: %s" % db_path ) - logm("I Number of mismatches allowed: %s" % max_mismatch_no) - logm("I Adapter seq: %s" % whole_adapter_seq) - logm("----------------------------------------------") + logm("Read filename: %s" % main_read_file) + logm("The first base (for mapping): %d"% cut_s ) + logm("The last base (for mapping): %d"% cut_e ) + + logm("Path for short reads aligner: %s"% aligner_command + '\n') + logm("Reference genome library path: %s"% db_path ) + + if asktag == "Y" : + logm("Un-directional library" ) + else : + logm("Directional library") + + logm("Number of mismatches allowed: %s"% max_mismatch_no ) + + if adapter_file !="": + logm("Adapter seq: %s" % whole_adapter_seq) + logm("-------------------------------- " ) #---------------------------------------------------------------- all_raw_reads=0 all_tagged=0 all_tagged_trimmed=0 + all_base_before_trim=0 + all_base_after_trim=0 + all_base_mapped=0 all_mapped=0 all_mapped_passed=0 n_cut_tag_lst={} @@ -135,6 +131,9 @@ num_mapped_FW_G2A = 0 num_mapped_RC_G2A = 0 + # Count of nucleotides, which should be cut before the adapters + Extra_base_cut_5end_adapter = max([ abs(len(i)-len(j)) for i,j in zip(cut5_context, cut3_context)]) + #=============================================== # directional sequencing #=============================================== @@ -156,72 +155,71 @@ #--- Checking input format ------------------------------------------ try : - read_inf=open(read_file,"r") + if read_file.endswith(".gz") : # support input file ending with ".gz" + read_inf = gzip.open(read_file, "rb") + else : + read_inf=open(read_file,"r") except IOError: print "[Error] Cannot open input file : %s" % read_file exit(-1) - oneline=read_inf.readline() - l=oneline.split() - n_fastq=0 - n_fasta=0 - input_format="" - if oneline[0]=="@": # FastQ - input_format="fastq" - elif len(l)==1 and oneline[0]!=">": # pure sequences - input_format="seq" - elif len(l)==11: # Illumina qseq - input_format="qseq" - elif oneline[0]==">": # fasta - input_format="fasta" + oneline = read_inf.readline() + l = oneline.split() + input_format = "" + if oneline[0]=="@": + input_format = "fastq" + elif len(l)==1 and oneline[0]!=">" : + input_format = "seq" + elif len(l)==11: + input_format = "qseq" + elif oneline[0]==">" : + input_format = "fasta" read_inf.close() + #---------------------------------------------------------------- - seq_id="" - seq="" - seq_ready=0 - for line in fileinput.input(read_file): - l=line.split() - + read_id = "" + seq = "" + seq_ready = 0 + line_no = 0 + for line in fileinput.input(read_file, openhook=fileinput.hook_compressed): + l = line.split() + line_no += 1 if input_format=="seq": - all_raw_reads+=1 - seq_id=str(all_raw_reads) - seq_id=seq_id.zfill(12) - seq=l[0] - seq_ready="Y" + all_raw_reads += 1 + read_id = str(all_raw_reads) + read_id = read_id.zfill(12) + seq = l[0] + seq_ready = "Y" elif input_format=="fastq": - m_fastq=math.fmod(n_fastq,4) - n_fastq+=1 - seq_ready="N" - if m_fastq==0: - all_raw_reads+=1 - seq_id=str(all_raw_reads) - seq_id=seq_id.zfill(12) - seq="" - elif m_fastq==1: - seq=l[0] - seq_ready="Y" - else: - seq="" + l_fastq = math.fmod(line_no, 4) + if l_fastq == 1 : + all_raw_reads += 1 + read_id = l[0][1:] + seq_ready = "N" + elif l_fastq == 2 : + seq = l[0] + seq_ready = "Y" + else : + seq = "" + seq_ready = "N" elif input_format=="qseq": - all_raw_reads+=1 - seq_id=str(all_raw_reads) - seq_id=seq_id.zfill(12) - seq=l[8] - seq_ready="Y" - elif input_format=="fasta": - m_fasta=math.fmod(n_fasta,2) - n_fasta+=1 - seq_ready="N" - if m_fasta==0: - all_raw_reads+=1 - seq_id=l[0][1:] - seq="" - elif m_fasta==1: - seq=l[0] - seq_ready="Y" - else: - seq="" + all_raw_reads += 1 + read_id = str(all_raw_reads) + read_id = read_id.zfill(12) + seq = l[8] + seq_ready = "Y" + elif input_format=="fasta" : + l_fasta = math.fmod(line_no,2) + if l_fasta==1: + all_raw_reads += 1 + read_id = l[0][1:] + seq = "" + seq_ready = "N" + elif l_fasta==0 : + seq = l[0] + seq_ready = "Y" + #--------------------------------------------------------------- if seq_ready=="Y": # Normalize the characters @@ -236,19 +234,22 @@ seq = seq[(cut_s-1):cut_e] # cut_s start from 1 cycle by default #-- Trimming adapter sequence --- + + all_base_before_trim += len(seq) if adapter_seq != "" : - new_read = RemoveAdapter(seq, adapter_seq, adapter_mismatch) + new_read = RemoveAdapter(seq, adapter_seq, adapter_mismatch, Extra_base_cut_5end_adapter) if len(new_read) < len(seq) : all_tagged_trimmed += 1 seq = new_read + all_base_after_trim += len(seq) if len(seq) <= 4 : seq = "N" * (cut_e - cut_s) # all reads will be considered, regardless of tags #--------- trimmed_raw_BS_read and qscore ------------------ - original_bs_reads[seq_id] = seq + original_bs_reads[read_id] = seq #--------- FW_C2T ------------------ - outf2.write('>%s\n%s\n'%(seq_id, seq.replace('C', 'T'))) + outf2.write('>%s\n%s\n'%(read_id, seq.replace('C', 'T'))) fileinput.close() outf2.close() @@ -384,7 +385,9 @@ N_mismatch = N_MIS(r_aln, g_aln) - if N_mismatch <= int(max_mismatch_no) : +# if N_mismatch <= int(max_mismatch_no) : + mm_no=float(max_mismatch_no) + if (mm_no>=1 and N_mismatch<=mm_no) or (mm_no<1 and N_mismatch<=(mm_no*len(r_aln)) ): all_mapped_passed += 1 methy = methy_seq(r_aln, g_aln + next2bp) mC_lst, uC_lst = mcounts(methy, mC_lst, uC_lst) @@ -402,6 +405,7 @@ my_region_serial = my_region_serial, my_region_start = my_region_start, my_region_end = my_region_end) + all_base_mapped += len(original_BS) else : print "[For debug]: reads not in same lengths" @@ -453,7 +457,9 @@ N_mismatch = N_MIS(r_aln, g_aln) - if N_mismatch <= int(max_mismatch_no) : +# if N_mismatch <= int(max_mismatch_no) : + mm_no=float(max_mismatch_no) + if (mm_no>=1 and N_mismatch<=mm_no) or (mm_no<1 and N_mismatch<=(mm_no*len(r_aln)) ): all_mapped_passed += 1 methy = methy_seq(r_aln, g_aln + next2bp) mC_lst, uC_lst = mcounts(methy, mC_lst, uC_lst) @@ -471,6 +477,7 @@ my_region_serial = my_region_serial, my_region_start = my_region_start, my_region_end = my_region_end) + all_base_mapped += len(original_BS) else : print "[For debug]: reads not in same lengths" @@ -507,76 +514,74 @@ #--- Checking input format ------------------------------------------ try : - read_inf=open(read_file,"r") + if read_file.endswith(".gz") : # support input file ending with ".gz" + read_inf = gzip.open(read_file, "rb") + else : + read_inf=open(read_file,"r") except IOError: print "[Error] Cannot open input file : %s" % read_file exit(-1) - oneline=read_inf.readline() - l=oneline.split() - n_fastq=0 - n_fasta=0 - input_format="" - if oneline[0]=="@": # FastQ - input_format="fastq" - elif len(l)==1 and oneline[0]!=">": # pure sequences - input_format="seq" - elif len(l)==11: # Illumina qseq - input_format="qseq" - elif oneline[0]==">": # fasta - input_format="fasta" + oneline = read_inf.readline() + l = oneline.split() + input_format = "" + if oneline[0]=="@": + input_format = "fastq" + elif len(l)==1 and oneline[0]!=">" : + input_format = "seq" + elif len(l)==11: + input_format = "qseq" + elif oneline[0]==">" : + input_format = "fasta" read_inf.close() #---------------------------------------------------------------- - seq_id = "" + read_id = "" seq = "" - seq_ready=0 - for line in fileinput.input(read_file): - l=line.split() - - if input_format == "seq": - all_raw_reads+=1 - seq_id=str(all_raw_reads) - seq_id=seq_id.zfill(12) - seq=l[0] - seq_ready="Y" + seq_ready = 0 + line_no = 0 + for line in fileinput.input(read_file, openhook=fileinput.hook_compressed): + l = line.split() + line_no += 1 + if input_format=="seq": + all_raw_reads += 1 + read_id = str(all_raw_reads) + read_id = read_id.zfill(12) + seq = l[0] + seq_ready = "Y" elif input_format=="fastq": - m_fastq=math.fmod(n_fastq,4) - n_fastq+=1 - seq_ready="N" - if m_fastq==0: - all_raw_reads+=1 - seq_id=str(all_raw_reads) - seq_id=seq_id.zfill(12) - seq="" - elif m_fastq==1: - seq=l[0] - seq_ready="Y" - else: - seq="" + l_fastq = math.fmod(line_no, 4) + if l_fastq == 1 : + all_raw_reads += 1 + read_id = l[0][1:] + seq_ready = "N" + elif l_fastq == 2 : + seq = l[0] + seq_ready = "Y" + else : + seq = "" + seq_ready = "N" elif input_format=="qseq": - all_raw_reads+=1 - seq_id=str(all_raw_reads) - seq_id=seq_id.zfill(12) - seq=l[8] - seq_ready="Y" - elif input_format=="fasta": - m_fasta=math.fmod(n_fasta,2) - n_fasta+=1 - seq_ready="N" - if m_fasta==0: - all_raw_reads+=1 - seq_id=l[0][1:] - seq="" - elif m_fasta==1: - seq=l[0] - seq_ready="Y" - else: - seq="" - #--------------------------------------------------------------- + all_raw_reads += 1 + read_id = str(all_raw_reads) + read_id = read_id.zfill(12) + seq = l[8] + seq_ready = "Y" + elif input_format=="fasta" : + l_fasta = math.fmod(line_no,2) + if l_fasta==1: + all_raw_reads += 1 + read_id = l[0][1:] + seq = "" + seq_ready = "N" + elif l_fasta==0 : + seq = l[0] + seq_ready = "Y" + + #--------------------------------------------------------------- if seq_ready=="Y": # Normalize the characters - seq=seq.upper().replace(".","N") + seq = seq.upper().replace(".","N") read_tag = [ m for m,n in [ (i, len(i)) for i in uniq(cut3_tag_lst)] if seq[0:n] == m ] if len(read_tag) > 0 : @@ -587,21 +592,23 @@ seq = seq[(cut_s-1):cut_e] # cut_s start from 1 cycle by default #-- Trimming adapter sequence --- + all_base_before_trim += len(seq) if adapter_seq != "" : - new_read = RemoveAdapter(seq, adapter_seq, adapter_mismatch) + new_read = RemoveAdapter(seq, adapter_seq, adapter_mismatch, Extra_base_cut_5end_adapter) if len(new_read) < len(seq) : all_tagged_trimmed += 1 seq = new_read + all_base_after_trim += len(seq) if len(seq) <= 4 : seq = "N" * (cut_e - cut_s) # all reads will be considered, regardless of tags #--------- trimmed_raw_BS_read and qscore ------------------ - original_bs_reads[seq_id] = seq + original_bs_reads[read_id] = seq #--------- FW_C2T ------------------ - outf2.write('>%s\n%s\n'%(seq_id, seq.replace('C', 'T'))) + outf2.write('>%s\n%s\n'%(read_id, seq.replace('C', 'T'))) #--------- RC_G2A ------------------ - outf3.write('>%s\n%s\n' % (seq_id, seq.replace("G","A"))) + outf3.write('>%s\n%s\n' % (read_id, seq.replace("G","A"))) fileinput.close() outf2.close() @@ -612,10 +619,10 @@ # mapping #-------------------------------------------------------------------------------- - WC2T=tmp_d("W_C2T_m"+max_mismatch_no+".mapping"+random_id) - CC2T=tmp_d("C_C2T_m"+max_mismatch_no+".mapping"+random_id) - WG2A=tmp_d("W_G2A_m"+max_mismatch_no+".mapping"+random_id) - CG2A=tmp_d("C_G2A_m"+max_mismatch_no+".mapping"+random_id) + WC2T = tmp_d("W_C2T_m"+max_mismatch_no+".mapping"+random_id) + CC2T = tmp_d("C_C2T_m"+max_mismatch_no+".mapping"+random_id) + WG2A = tmp_d("W_G2A_m"+max_mismatch_no+".mapping"+random_id) + CG2A = tmp_d("C_G2A_m"+max_mismatch_no+".mapping"+random_id) run_in_parallel([ aligner_command % {'reference_genome' : os.path.join(db_path,'W_C2T'), 'input_file' : outfile2, @@ -638,37 +645,37 @@ # Post processing #-------------------------------------------------------------------------------- - FW_C2T_U,FW_C2T_R=extract_mapping(WC2T) - RC_G2A_U,RC_G2A_R=extract_mapping(CG2A) + FW_C2T_U,FW_C2T_R = extract_mapping(WC2T) + RC_G2A_U,RC_G2A_R = extract_mapping(CG2A) - FW_G2A_U,FW_G2A_R=extract_mapping(WG2A) - RC_C2T_U,RC_C2T_R=extract_mapping(CC2T) + FW_G2A_U,FW_G2A_R = extract_mapping(WG2A) + RC_C2T_U,RC_C2T_R = extract_mapping(CC2T) logm("Extracting alignments is done") #---------------------------------------------------------------- # get unique-hit reads #---------------------------------------------------------------- - Union_set=set(FW_C2T_U.iterkeys()) | set(RC_G2A_U.iterkeys()) | set(FW_G2A_U.iterkeys()) | set(RC_C2T_U.iterkeys()) + Union_set = set(FW_C2T_U.iterkeys()) | set(RC_G2A_U.iterkeys()) | set(FW_G2A_U.iterkeys()) | set(RC_C2T_U.iterkeys()) - Unique_FW_C2T=set() # + - Unique_RC_G2A=set() # + - Unique_FW_G2A=set() # - - Unique_RC_C2T=set() # - - Multiple_hits=set() + Unique_FW_C2T = set() # + + Unique_RC_G2A = set() # + + Unique_FW_G2A = set() # - + Unique_RC_C2T = set() # - + Multiple_hits = set() - for x in Union_set: - _list=[] + for x in Union_set : + _list = [] for dx in [FW_C2T_U, RC_G2A_U, FW_G2A_U, RC_C2T_U]: - mis_lst=dx.get(x,[99]) - mis=int(mis_lst[0]) + mis_lst = dx.get(x,[99]) + mis = int(mis_lst[0]) _list.append(mis) for dx in [FW_C2T_R, RC_G2A_R, FW_G2A_R, RC_C2T_R]: - mis=dx.get(x,99) + mis = dx.get(x,99) _list.append(mis) - mini=min(_list) + mini = min(_list) if _list.count(mini) == 1: - mini_index=_list.index(mini) + mini_index = _list.index(mini) if mini_index == 0: Unique_FW_C2T.add(x) elif mini_index == 1: @@ -683,7 +690,7 @@ Multiple_hits.add(x) # write reads rejected by Multiple Hits to file if show_multiple_hit : - outf_MH=open("Multiple_hit.fa",'w') + outf_MH = open("Multiple_hit.fa",'w') for i in Multiple_hits : outf_MH.write(">%s\n" % i) outf_MH.write("%s\n" % original_bs_reads[i]) @@ -695,18 +702,18 @@ del RC_C2T_R del RC_G2A_R - FW_C2T_uniq_lst=[[FW_C2T_U[u][1],u] for u in Unique_FW_C2T] - FW_G2A_uniq_lst=[[FW_G2A_U[u][1],u] for u in Unique_FW_G2A] - RC_C2T_uniq_lst=[[RC_C2T_U[u][1],u] for u in Unique_RC_C2T] - RC_G2A_uniq_lst=[[RC_G2A_U[u][1],u] for u in Unique_RC_G2A] + FW_C2T_uniq_lst = [[FW_C2T_U[u][1],u] for u in Unique_FW_C2T] + FW_G2A_uniq_lst = [[FW_G2A_U[u][1],u] for u in Unique_FW_G2A] + RC_C2T_uniq_lst = [[RC_C2T_U[u][1],u] for u in Unique_RC_C2T] + RC_G2A_uniq_lst = [[RC_G2A_U[u][1],u] for u in Unique_RC_G2A] FW_C2T_uniq_lst.sort() RC_C2T_uniq_lst.sort() FW_G2A_uniq_lst.sort() RC_G2A_uniq_lst.sort() - FW_C2T_uniq_lst=[x[1] for x in FW_C2T_uniq_lst] - RC_C2T_uniq_lst=[x[1] for x in RC_C2T_uniq_lst] - FW_G2A_uniq_lst=[x[1] for x in FW_G2A_uniq_lst] - RC_G2A_uniq_lst=[x[1] for x in RC_G2A_uniq_lst] + FW_C2T_uniq_lst = [x[1] for x in FW_C2T_uniq_lst] + RC_C2T_uniq_lst = [x[1] for x in RC_C2T_uniq_lst] + FW_G2A_uniq_lst = [x[1] for x in FW_G2A_uniq_lst] + RC_G2A_uniq_lst = [x[1] for x in RC_G2A_uniq_lst] del Unique_FW_C2T del Unique_FW_G2A @@ -716,7 +723,7 @@ #---------------------------------------------------------------- # Post-filtering reads - # ---- FW_C2T ---- undirectional + # ---- FW_C2T ---- un-directional FW_regions = dict() gseq = dict() chr_length = dict() @@ -758,7 +765,9 @@ try_count += 1 N_mismatch = N_MIS(r_aln, g_aln) - if N_mismatch <= int(max_mismatch_no) : +# if N_mismatch <= int(max_mismatch_no) : + mm_no=float(max_mismatch_no) + if (mm_no>=1 and N_mismatch<=mm_no) or (mm_no<1 and N_mismatch<=(mm_no*len(r_aln)) ): all_mapped_passed += 1 methy = methy_seq(r_aln, g_aln + next2bp) mC_lst, uC_lst = mcounts(methy, mC_lst, uC_lst) @@ -776,11 +785,12 @@ my_region_serial = my_region_serial, my_region_start = my_region_start, my_region_end = my_region_end) + all_base_mapped += len(original_BS) else : print "[For debug]: reads not in same lengths" - # ---- RC_C2T ---- undirectional + # ---- RC_C2T ---- un-directional RC_regions = dict() for header in RC_C2T_uniq_lst : _, mapped_chr, mapped_location, cigar = RC_C2T_U[header] @@ -821,7 +831,9 @@ try_count += 1 N_mismatch = N_MIS(r_aln, g_aln) - if N_mismatch <= int(max_mismatch_no) : +# if N_mismatch <= int(max_mismatch_no) : + mm_no=float(max_mismatch_no) + if (mm_no>=1 and N_mismatch<=mm_no) or (mm_no<1 and N_mismatch<=(mm_no*len(r_aln)) ): all_mapped_passed += 1 methy = methy_seq(r_aln, g_aln + next2bp) mC_lst, uC_lst = mcounts(methy, mC_lst, uC_lst) @@ -839,12 +851,13 @@ my_region_serial = my_region_serial, my_region_start = my_region_start, my_region_end = my_region_end) + all_base_mapped += len(original_BS) else : print "[For debug]: reads not in same lengths" - # ---- FW_G2A ---- undirectional + # ---- FW_G2A ---- un-directional FW_regions = dict() gseq = dict() chr_length = dict() @@ -891,9 +904,10 @@ # print "[For debug]: FW_G2A read still can not find fragment serial" # Tip: sometimes "my_region_serial" is still 0 ... - N_mismatch = N_MIS(r_aln, g_aln) - if N_mismatch <= int(max_mismatch_no) : +# if N_mismatch <= int(max_mismatch_no) : + max_mismatch_no=float(max_mismatch_no) + if (mm_no>=1 and N_mismatch<=mm_no) or (mm_no<1 and N_mismatch<=(mm_no*len(r_aln)) ): all_mapped_passed += 1 methy = methy_seq(r_aln, g_aln + next2bp) mC_lst, uC_lst = mcounts(methy, mC_lst, uC_lst) @@ -911,11 +925,12 @@ my_region_serial = my_region_serial, my_region_start = my_region_start, my_region_end = my_region_end) + all_base_mapped += len(original_BS) else : print "[For debug]: reads not in same lengths" - # ---- RC_G2A ---- undirectional + # ---- RC_G2A ---- un-directional RC_regions = dict() for header in RC_G2A_uniq_lst : _, mapped_chr, mapped_location, cigar = RC_G2A_U[header] @@ -961,9 +976,10 @@ # print "[For debug]: chr=", mapped_chr # print "[For debug]: RC_C2A read still cannot find fragment serial" - N_mismatch = N_MIS(r_aln, g_aln) - if N_mismatch <= int(max_mismatch_no) : +# if N_mismatch <= int(max_mismatch_no) : + mm_no=float(max_mismatch_no) + if (mm_no>=1 and N_mismatch<=mm_no) or (mm_no<1 and N_mismatch<=(mm_no*len(r_aln)) ): all_mapped_passed += 1 methy = methy_seq(r_aln, g_aln + next2bp) mC_lst, uC_lst = mcounts(methy, mC_lst, uC_lst) @@ -981,37 +997,38 @@ my_region_serial = my_region_serial, my_region_start = my_region_start, my_region_end = my_region_end) + all_base_mapped += len(original_BS) else : print "[For debug]: reads not in same lengths" - - # Finished both FW and RC logm("Done: %s (%d) \n" % (read_file, no_my_files)) print "--> %s (%d) "%(read_file, no_my_files) del original_bs_reads delete_files(WC2T, CC2T, WG2A, CG2A) - - - # End of un-directional library delete_files(tmp_path) - logm("O Number of raw reads: %d "% all_raw_reads) - if all_raw_reads >0: - logm("O Number of CGG/TGG tagged reads: %d (%1.3f)"%(all_tagged,float(all_tagged)/all_raw_reads)) + logm("Number of raw reads: %d "% all_raw_reads) + if all_raw_reads>0: + logm("Number of raw reads with CGG/TGG at 5' end: %d (%1.3f)"%(all_tagged,float(all_tagged)/all_raw_reads)) for kk in range(len(n_cut_tag_lst)): - logm("O Number of raw reads with %s tag: %d (%1.3f)"%(cut3_tag_lst[kk],n_cut_tag_lst[cut3_tag_lst[kk]],float(n_cut_tag_lst[cut3_tag_lst[kk]])/all_raw_reads)) - logm("O Number of CGG/TGG reads having adapter removed: %d "%all_tagged_trimmed) - logm("O Number of reads rejected because of multiple hits: %d\n" % len(Multiple_hits) ) - logm("O Number of unique-hits reads for post-filtering: %d"%all_mapped) + logm("Number of raw reads with tag %s: %d (%1.3f)"%(cut3_tag_lst[kk],n_cut_tag_lst[cut3_tag_lst[kk]],float(n_cut_tag_lst[cut3_tag_lst[kk]])/all_raw_reads)) + if adapter_seq!="" : + logm("Number of reads having adapter removed: %d "%all_tagged_trimmed) + logm("Number of bases in total: %d "%all_base_before_trim) + if adapter_seq!="" : + logm("Number of bases after trimming the adapters: %d (%1.3f)"%(all_base_after_trim, float(all_base_after_trim)/all_base_before_trim ) ) + logm("Number of reads are rejected because of multiple hits: %d\n" % len(Multiple_hits) ) + logm("Number of unique-hits reads (before post-filtering): %d"%all_mapped) - logm("O ------ %d uniquely aligned reads, passed fragment check, with mismatches <= %s"%(all_mapped_passed, max_mismatch_no)) - logm("O Mappability= %1.4f%%"%(100*float(all_mapped_passed)/all_raw_reads)) + logm("------ %d uniquely aligned reads, passed fragment check, with mismatches <= %s"%(all_mapped_passed, max_mismatch_no)) + logm("Mappability= %1.4f%%"%(100*float(all_mapped_passed)/all_raw_reads)) + logm("Total bases of uniquely mapped reads %7d"% all_base_mapped ) - if asktag=="Y": # undiretional + if asktag=="Y": # un-diretional logm(" ---- %7d FW reads mapped to Watson strand"%(num_mapped_FW_C2T) ) logm(" ---- %7d RC reads mapped to Watson strand"%(num_mapped_FW_G2A) ) logm(" ---- %7d FW reads mapped to Crick strand"%(num_mapped_RC_C2T) ) @@ -1021,16 +1038,17 @@ elif asktag=="N": # directional logm(" ---- %7d FW reads mapped to Watson strand"%(num_mapped_FW_C2T) ) logm(" ---- %7d FW reads mapped to Crick strand"%(num_mapped_RC_C2T) ) + logm("Total bases of uniquely mapped reads %7d"% all_base_mapped ) - n_CG=mC_lst[0]+uC_lst[0] - n_CHG=mC_lst[1]+uC_lst[1] - n_CHH=mC_lst[2]+uC_lst[2] + n_CG = mC_lst[0] + uC_lst[0] + n_CHG = mC_lst[1] + uC_lst[1] + n_CHH = mC_lst[2] + uC_lst[2] logm("----------------------------------------------") - logm("M Methylated C in mapped reads ") - logm("M mCG %1.3f%%"%((100*float(mC_lst[0])/n_CG) if n_CG != 0 else 0)) - logm("M mCHG %1.3f%%"%((100*float(mC_lst[1])/n_CHG) if n_CHG != 0 else 0)) - logm("M mCHH %1.3f%%"%((100*float(mC_lst[2])/n_CHH) if n_CHH != 0 else 0)) + logm("Methylated C in mapped reads ") + logm(" mCG %1.3f%%"%( (100 * float(mC_lst[0]) / n_CG) if n_CG!=0 else 0)) + logm(" mCHG %1.3f%%"%( (100 * float(mC_lst[1]) / n_CHG) if n_CHG!=0 else 0)) + logm(" mCHH %1.3f%%"%( (100 * float(mC_lst[2]) / n_CHH) if n_CHH!=0 else 0)) logm("----------------------------------------------") logm("------------------- END ----------------------")
--- a/BSseeker2/bs_align/bs_single_end.py Fri Jul 12 18:47:28 2013 -0400 +++ b/BSseeker2/bs_align/bs_single_end.py Tue Nov 05 01:55:39 2013 -0500 @@ -1,6 +1,7 @@ import fileinput, os, time, random, math from bs_utils.utils import * from bs_align_utils import * +import gzip #---------------------------------------------------------------- # Read from the mapped results, return lists of unique / multiple-hit reads @@ -77,20 +78,23 @@ #---------------------------------------------------------------- - logm("Read filename: %s"% main_read_file ) - logm("Un-directional library: %s" % asktag ) - logm("The first base (for mapping): %d" % cut1) - logm("The last base (for mapping): %d" % cut2) - logm("Max. lines per mapping: %d"% no_small_lines) - logm("Aligner: %s" % aligner_command) - logm("Reference genome library path: %s" % db_path ) - logm("Number of mismatches allowed: %s" % max_mismatch_no ) + logm("Read filename: %s" % main_read_file) + logm("The first base (for mapping): %d"% cut1 ) + logm("The last base (for mapping): %d"% cut2 ) + + logm("Path for short reads aligner: %s"% aligner_command + '\n') + logm("Reference genome library path: %s"% db_path ) + + if asktag == "Y" : + logm("Un-directional library" ) + else : + logm("Directional library") + + logm("Number of mismatches allowed: %s"% max_mismatch_no ) + if adapter_file !="": - if asktag=="N": - logm("Adapter to be removed from 3' reads: %s"%(adapter.rstrip("\n"))) - elif asktag=="Y": - logm("Adapter to be removed from 3' FW reads: %s"%(adapter_fw.rstrip("\n")) ) - logm("Adapter to be removed from 3' RC reads: %s"%(adapter_rc.rstrip("\n")) ) + logm("Adapter seq: %s" % adapter_fw) + logm("-------------------------------- " ) #---------------------------------------------------------------- # helper method to join fname with tmp_path @@ -109,9 +113,12 @@ #---- Stats ------------------------------------------------------------ all_raw_reads=0 - all_trimed=0 + all_trimmed=0 all_mapped=0 all_mapped_passed=0 + all_base_before_trim=0 + all_base_after_trim=0 + all_base_mapped=0 numbers_premapped_lst=[0,0,0,0] numbers_mapped_lst=[0,0,0,0] @@ -119,7 +126,6 @@ mC_lst=[0,0,0] uC_lst=[0,0,0] - no_my_files=0 #---------------------------------------------------------------- @@ -132,7 +138,7 @@ random_id = ".tmp-"+str(random.randint(1000000,9999999)) #------------------------------------------------------------------- - # undirectional sequencing + # un-directional sequencing #------------------------------------------------------------------- if asktag=="Y": @@ -146,74 +152,70 @@ #---------------------------------------------------------------- # detect format of input file try : - read_inf=open(read_file,"r") + if read_file.endswith(".gz") : # support input file ending with ".gz" + read_inf = gzip.open(read_file, "rb") + else : + read_inf=open(read_file,"r") except IOError : print "[Error] Cannot open input file : %s" % read_file exit(-1) - oneline=read_inf.readline() - l=oneline.split() - input_format="" - if oneline[0]=="@": # fastq - input_format="fastq" - n_fastq=0 - elif len(l)==1 and oneline[0]!=">": # pure sequences - input_format="seq" - elif len(l)==11: # qseq - input_format="qseq" - elif oneline[0]==">": # fasta - input_format="fasta" - n_fasta=0 + oneline = read_inf.readline() + l = oneline.split() + input_format = "" + if oneline[0]=="@": + input_format = "fastq" + elif len(l)==1 and oneline[0]!=">": + input_format = "seq" + elif len(l)==11: + input_format = "qseq" + elif oneline[0]==">": + input_format = "fasta" read_inf.close() #---------------------------------------------------------------- - # read sequence, remove adapter and convert - read_id="" - seq="" - seq_ready="N" - for line in fileinput.input(read_file): - l=line.split() - + # read sequence, remove adapter and convert + read_id = "" + seq = "" + seq_ready = "N" + line_no = 0 + for line in fileinput.input(read_file, openhook=fileinput.hook_compressed): # allow input with .gz + l = line.split() + line_no += 1 if input_format=="seq": - all_raw_reads+=1 - read_id=str(all_raw_reads) - read_id=read_id.zfill(12) - seq=l[0] - seq_ready="Y" + all_raw_reads += 1 + read_id = str(all_raw_reads) + read_id = read_id.zfill(12) + seq = l[0] + seq_ready = "Y" elif input_format=="fastq": - m_fastq=math.fmod(n_fastq,4) - n_fastq+=1 - seq_ready="N" - if m_fastq==0: - all_raw_reads+=1 - read_id=str(all_raw_reads) - read_id=read_id.zfill(12) - seq="" - elif m_fastq==1: - seq=l[0] - seq_ready="Y" - else: - seq="" + l_fastq = math.fmod(line_no, 4) + if l_fastq == 1 : + all_raw_reads += 1 + read_id = l[0][1:] + seq_ready = "N" + elif l_fastq == 2 : + seq = l[0] + seq_ready = "Y" + else : + seq = "" + seq_ready = "N" elif input_format=="qseq": - all_raw_reads+=1 - read_id=str(all_raw_reads) - read_id=read_id.zfill(12) - seq=l[8] - seq_ready="Y" - elif input_format=="fasta": - m_fasta=math.fmod(n_fasta,2) - n_fasta+=1 - seq_ready="N" - if m_fasta==0: - all_raw_reads+=1 - #read_id=str(all_raw_reads) - read_id=l[0][1:] - seq="" - elif m_fasta==1: - seq=l[0] - seq_ready="Y" - else: - seq="" + all_raw_reads += 1 + read_id = str(all_raw_reads) + read_id = read_id.zfill(12) + seq = l[8] + seq_ready = "Y" + elif input_format=="fasta" : + l_fasta = math.fmod(line_no,2) + if l_fasta==1: + all_raw_reads += 1 + read_id = l[0][1:] + seq = "" + seq_ready = "N" + elif l_fasta==0 : + seq = l[0] + seq_ready = "Y" #---------------------------------------------------------------- if seq_ready=="Y": @@ -222,13 +224,14 @@ seq=seq.replace(".","N") # striping BS adapter from 3' read + all_base_before_trim += len(seq) if (adapter_fw !="") and (adapter_rc !="") : new_read = RemoveAdapter(seq, adapter_fw, adapter_mismatch) new_read = Remove_5end_Adapter(new_read, adapter_rc) if len(new_read) < len(seq) : - all_trimed += 1 + all_trimmed += 1 seq = new_read - + all_base_after_trim += len(seq) if len(seq)<=4: seq=''.join(["N" for x in xrange(cut2-cut1+1)]) @@ -353,18 +356,18 @@ RC_C2T_uniq_lst=[x[1] for x in RC_C2T_uniq_lst] FW_G2A_uniq_lst=[x[1] for x in FW_G2A_uniq_lst] RC_G2A_uniq_lst=[x[1] for x in RC_G2A_uniq_lst] + #---------------------------------------------------------------- + + numbers_premapped_lst[0] += len(Unique_FW_C2T) + numbers_premapped_lst[1] += len(Unique_RC_G2A) + numbers_premapped_lst[2] += len(Unique_FW_G2A) + numbers_premapped_lst[3] += len(Unique_RC_C2T) del Unique_FW_C2T del Unique_FW_G2A del Unique_RC_C2T del Unique_RC_G2A - #---------------------------------------------------------------- - numbers_premapped_lst[0] += len(Unique_FW_C2T) - numbers_premapped_lst[1] += len(Unique_RC_G2A) - numbers_premapped_lst[2] += len(Unique_FW_G2A) - numbers_premapped_lst[3] += len(Unique_RC_C2T) - #---------------------------------------------------------------- @@ -376,7 +379,6 @@ (FW_G2A_uniq_lst,FW_G2A_U), (RC_C2T_uniq_lst,RC_C2T_U)]: nn += 1 - mapped_chr0 = "" for header in ali_unique_lst: @@ -422,7 +424,9 @@ if len(r_aln)==len(g_aln): N_mismatch = N_MIS(r_aln, g_aln) - if N_mismatch <= int(max_mismatch_no): +# if N_mismatch <= int(max_mismatch_no): + mm_no=float(max_mismatch_no) + if (mm_no>=1 and N_mismatch<=mm_no) or (mm_no<1 and N_mismatch<=(mm_no*len(r_aln)) ): numbers_mapped_lst[nn-1] += 1 all_mapped_passed += 1 methy = methy_seq(r_aln, g_aln + next) @@ -436,6 +440,7 @@ XS = 1 outfile.store(header, N_mismatch, FR, mapped_chr, mapped_strand, mapped_location, cigar, original_BS, methy, XS, output_genome = output_genome) + all_base_mapped += len(original_BS) #---------------------------------------------------------------- logm("--> %s (%d) "%(read_file, no_my_files)) @@ -449,78 +454,74 @@ if asktag=="N": #---------------------------------------------------------------- - outfile2=tmp_d('Trimed_C2T.fa'+random_id) + outfile2=tmp_d('Trimmed_C2T.fa'+random_id) outf2=open(outfile2,'w') - - n=0 #---------------------------------------------------------------- try : - read_inf=open(read_file,"r") + if read_file.endswith(".gz") : # support input file ending with ".gz" + read_inf = gzip.open(read_file, "rb") + else : + read_inf=open(read_file,"r") except IOError : print "[Error] Cannot open input file : %s" % read_file exit(-1) - oneline=read_inf.readline() - l=oneline.split() - input_format="" - if oneline[0]=="@": # FastQ - input_format="fastq" - n_fastq=0 - elif len(l)==1 and oneline[0]!=">": # pure sequences - input_format="seq" - elif len(l)==11: # Illumina GAII qseq file - input_format="qseq" - elif oneline[0]==">": # fasta - input_format="fasta" - n_fasta=0 + oneline = read_inf.readline() + l = oneline.split() + input_format = "" + if oneline[0]=="@": + input_format = "fastq" + elif len(l)==1 and oneline[0]!=">": + input_format = "seq" + elif len(l)==11: + input_format = "qseq" + elif oneline[0]==">": + input_format = "fasta" read_inf.close() + #print "detected data format: %s"%(input_format) #---------------------------------------------------------------- read_id="" seq="" seq_ready="N" - for line in fileinput.input(read_file): - l=line.split() + line_no = 0 + for line in fileinput.input(read_file, openhook=fileinput.hook_compressed): + l = line.split() + line_no += 1 if input_format=="seq": - all_raw_reads+=1 - read_id=str(all_raw_reads) - read_id=read_id.zfill(12) - seq=l[0] - seq_ready="Y" + all_raw_reads += 1 + read_id = str(all_raw_reads) + read_id = read_id.zfill(12) + seq = l[0] + seq_ready = "Y" elif input_format=="fastq": - m_fastq=math.fmod(n_fastq,4) - n_fastq+=1 - seq_ready="N" - if m_fastq==0: - all_raw_reads+=1 - read_id=str(all_raw_reads) - read_id=read_id.zfill(12) - seq="" - elif m_fastq==1: - seq=l[0] - seq_ready="Y" - else: - seq="" + l_fastq = math.fmod(line_no, 4) + if l_fastq == 1 : + all_raw_reads += 1 + read_id = l[0][1:] + seq_ready = "N" + elif l_fastq == 2 : + seq = l[0] + seq_ready = "Y" + else : + seq = "" + seq_ready = "N" elif input_format=="qseq": - all_raw_reads+=1 - read_id=str(all_raw_reads) - read_id=read_id.zfill(12) - seq=l[8] - seq_ready="Y" - elif input_format=="fasta": - m_fasta=math.fmod(n_fasta,2) - n_fasta+=1 - seq_ready="N" - if m_fasta==0: - all_raw_reads+=1 - read_id=l[0][1:] - seq="" - elif m_fasta==1: - seq=l[0] - seq_ready="Y" - else: - seq="" - + all_raw_reads += 1 + read_id = str(all_raw_reads) + read_id = read_id.zfill(12) + seq = l[8] + seq_ready = "Y" + elif input_format=="fasta" : + l_fasta = math.fmod(line_no,2) + if l_fasta==1: + all_raw_reads += 1 + read_id = l[0][1:] + seq = "" + seq_ready = "N" + elif l_fasta==0 : + seq = l[0] + seq_ready = "Y" #-------------------------------- if seq_ready=="Y": seq=seq[cut1-1:cut2] #<---selecting 0..52 from 1..72 -e 52 @@ -528,12 +529,13 @@ seq=seq.replace(".","N") #--striping adapter from 3' read ------- + all_base_before_trim += len(seq) if adapter != "": new_read = RemoveAdapter(seq, adapter, adapter_mismatch) if len(new_read) < len(seq) : - all_trimed += 1 + all_trimmed += 1 seq = new_read - + all_base_after_trim += len(seq) if len(seq)<=4: seq = "N" * (cut2-cut1+1) @@ -612,7 +614,6 @@ outf_MH.close() - FW_C2T_uniq_lst=[[FW_C2T_U[u][1],u] for u in Unique_FW_C2T] RC_C2T_uniq_lst=[[RC_C2T_U[u][1],u] for u in Unique_RC_C2T] FW_C2T_uniq_lst.sort() @@ -633,7 +634,6 @@ chr_length = dict() for ali_unique_lst, ali_dic in [(FW_C2T_uniq_lst,FW_C2T_U),(RC_C2T_uniq_lst,RC_C2T_U)]: nn += 1 - mapped_chr0 = "" for header in ali_unique_lst: _, mapped_chr, mapped_location, cigar = ali_dic[header] original_BS = original_bs_reads[header] @@ -641,11 +641,6 @@ if mapped_chr not in gseq : gseq[mapped_chr] = deserialize(db_d(mapped_chr)) chr_length[mapped_chr] = len(gseq[mapped_chr]) - #if mapped_chr != mapped_chr0: - # my_gseq = deserialize(db_d(mapped_chr)) - # chr_length = len(my_gseq) - # mapped_chr0 = mapped_chr - #------------------------------------- r_start, r_end, g_len = get_read_start_end_and_genome_length(cigar) @@ -664,7 +659,8 @@ if len(r_aln) == len(g_aln): N_mismatch = N_MIS(r_aln, g_aln) #+ original_BS_length - (r_end - r_start) # mismatches in the alignment + soft clipped nucleotides - if N_mismatch <= int(max_mismatch_no): + mm_no=float(max_mismatch_no) + if (mm_no>=1 and N_mismatch<=mm_no) or (mm_no<1 and N_mismatch<=(mm_no*len(r_aln)) ): numbers_mapped_lst[nn-1] += 1 all_mapped_passed += 1 methy = methy_seq(r_aln, g_aln+next) @@ -678,6 +674,7 @@ XS = 1 outfile.store(header, N_mismatch, FR, mapped_chr, mapped_strand, mapped_location, cigar, original_BS, methy, XS, output_genome = output_genome) + all_base_mapped += len(original_BS) #---------------------------------------------------------------- logm("--> %s (%d) "%(read_file,no_my_files)) @@ -686,14 +683,16 @@ #---------------------------------------------------------------- -# outf.close() delete_files(tmp_path) logm("Number of raw reads: %d \n"% all_raw_reads) if all_raw_reads >0: - logm("Number of reads having adapter removed: %d \n" % all_trimed ) - logm("Number of reads rejected because of multiple hits: %d\n" % len(Multiple_hits) ) - logm("Number of unique-hits reads for post-filtering: %d\n" % all_mapped) + logm("Number of bases in total: %d "%all_base_before_trim) + if adapter != "" : + logm("Number of reads having adapter removed: %d \n" % all_trimmed ) + logm("Number of bases after trimming the adapters: %d (%1.3f)"%(all_base_after_trim, float(all_base_after_trim)/all_base_before_trim) ) + logm("Number of reads are rejected because of multiple hits: %d\n" % len(Multiple_hits) ) + logm("Number of unique-hits reads (before post-filtering): %d\n" % all_mapped) if asktag=="Y": logm(" ---- %7d FW reads mapped to Watson strand (before post-filtering)"%(numbers_premapped_lst[0]) ) logm(" ---- %7d RC reads mapped to Watson strand (before post-filtering)"%(numbers_premapped_lst[1]) ) @@ -713,6 +712,8 @@ logm(" ---- %7d FW reads mapped to Watson strand"%(numbers_mapped_lst[0]) ) logm(" ---- %7d FW reads mapped to Crick strand"%(numbers_mapped_lst[1]) ) logm("Mappability= %1.4f%%"%(100*float(all_mapped_passed)/all_raw_reads) ) + logm("Total bases of uniquely mapped reads %7d"% all_base_mapped ) + n_CG=mC_lst[0]+uC_lst[0] n_CHG=mC_lst[1]+uC_lst[1]
--- a/BSseeker2/bs_align/output.py Fri Jul 12 18:47:28 2013 -0400 +++ b/BSseeker2/bs_align/output.py Tue Nov 05 01:55:39 2013 -0500 @@ -81,3 +81,53 @@ ('XG', output_genome)) self.f.write(a) + + + def store2(self, qname, flag, N_mismatch, FR, refname, strand, pos, cigar, original_BS, methy, STEVE, rnext = -1, pnext = -1, qual = None, output_genome = None, + rrbs = False, my_region_serial = -1, my_region_start = 0, my_region_end = 0): + + if self.format == BS_SEEKER1: + + # remove the soft clipped bases from the read + # this is done for backwards compatibility with the old format + r_start, r_end, _ = get_read_start_end_and_genome_length(cigar) + original_BS = original_BS[r_start : r_end] + + if rrbs: + self.f.write('%s\t%2d\t%s\t%s%s%s\t%s\t%s\t%s\t%d\n' % (qname, N_mismatch, FR, refname, strand, str(pos+1).zfill(10), output_genome, original_BS, methy, STEVE)) + else: + self.f.write('%s\t%2d\t%s\t%s%s%s\t%s\t%s\t%s\t%d\t%d\t%d\t%d\n' % (qname, N_mismatch, FR, refname, strand, str(pos+1).zfill(10), output_genome, original_BS, methy, my_region_serial, my_region_start, my_region_end, STEVE)) + + + elif self.format == BAM or self.format == SAM: + + a = pysam.AlignedRead() + a.qname = qname + a.seq = original_BS if strand == '+' else reverse_compl_seq(original_BS) + a.flag = flag + a.tid = self.chrom_ids[refname] + a.pos = pos + a.mapq = 255 + a.cigar = cigar if strand == '+' else list(reversed(cigar)) + a.rnext = rnext if rnext == -1 else self.chrom_ids[rnext] + a.pnext = pnext + a.qual= qual + if rrbs: + a.tags = (('XO', FR), + ('XS', STEVE), + ('NM', N_mismatch), + ('XM', methy), + ('XG', output_genome), + ('YR', my_region_serial), + ('YS', my_region_start), + ('YE', my_region_end) + ) + + else: + a.tags = (('XO', FR), + ('XS', STEVE), + ('NM', N_mismatch), + ('XM', methy), + ('XG', output_genome)) + + self.f.write(a)
--- a/BSseeker2/bs_index/wg_build.py Fri Jul 12 18:47:28 2013 -0400 +++ b/BSseeker2/bs_index/wg_build.py Tue Nov 05 01:55:39 2013 -0500 @@ -3,8 +3,8 @@ def wg_build(fasta_file, build_command, ref_path, aligner): - # ref_path is a string that containts the directory where the reference genomes are stored with - # the input fasta filename appended + # ref_path is a string that contains the directory where the reference genomes are stored with + # the input Fasta filename appended ref_path = os.path.join(ref_path, os.path.split(fasta_file)[1] + '_'+aligner)
--- a/BSseeker2/bs_seeker2-align.py Fri Jul 12 18:47:28 2013 -0400 +++ b/BSseeker2/bs_seeker2-align.py Tue Nov 05 01:55:39 2013 -0500 @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python from optparse import OptionParser, OptionGroup import re @@ -7,44 +7,47 @@ from bs_align.bs_pair_end import * from bs_align.bs_single_end import * from bs_align.bs_rrbs import * -from bs_utils.utils import * +#import re +#from bs_utils.utils import * if __name__ == '__main__': - parser = OptionParser() + parser = OptionParser(usage="Usage: %prog {-i <single> | -1 <mate1> -2 <mate2>} -g <genome.fa> [options]") # option group 1 opt_group = OptionGroup(parser, "For single end reads") - opt_group.add_option("-i", "--input", type="string", dest="infilename",help="Input your read file name (FORMAT: sequences, fastq, qseq,fasta)", metavar="INFILE") + opt_group.add_option("-i", "--input", type="string", dest="infilename",help="Input read file (FORMAT: sequences, qseq, fasta, fastq). Ex: read.fa or read.fa.gz", metavar="INFILE") parser.add_option_group(opt_group) # option group 2 opt_group = OptionGroup(parser, "For pair end reads") - opt_group.add_option("-1", "--input_1", type="string", dest="infilename_1",help="Input your read file end 1 (FORMAT: sequences, qseq, fasta, fastq)", metavar="FILE") - opt_group.add_option("-2", "--input_2", type="string", dest="infilename_2",help="Input your read file end 2 (FORMAT: sequences, qseq, fasta, fastq)", metavar="FILE") - opt_group.add_option("--minins",type = "int",dest = "min_insert_size", help="The minimum insert size for valid paired-end alignments [Default: %default]", default = -1) - opt_group.add_option("--maxins",type = "int",dest = "max_insert_size", help="The maximum insert size for valid paired-end alignments [Default: %default]", default = 400) + opt_group.add_option("-1", "--input_1", type="string", dest="infilename_1",help="Input read file, mate 1 (FORMAT: sequences, qseq, fasta, fastq)", metavar="FILE") + opt_group.add_option("-2", "--input_2", type="string", dest="infilename_2",help="Input read file, mate 2 (FORMAT: sequences, qseq, fasta, fastq)", metavar="FILE") + opt_group.add_option("-I", "--minins",type = "int",dest = "min_insert_size", help="The minimum insert size for valid paired-end alignments [Default: %default]", default = 0) + opt_group.add_option("-X", "--maxins",type = "int",dest = "max_insert_size", help="The maximum insert size for valid paired-end alignments [Default: %default]", default = 500) parser.add_option_group(opt_group) # option group 3 opt_group = OptionGroup(parser, "Reduced Representation Bisulfite Sequencing Options") - opt_group.add_option("-r", "--rrbs", action="store_true", dest="rrbs", default = False, help = 'Process reads from Reduced Representation Bisulfite Sequencing experiments') + opt_group.add_option("-r", "--rrbs", action="store_true", dest="rrbs", default = False, help = 'Map reads to the Reduced Representation genome') opt_group.add_option("-c", "--cut-site", type="string",dest="cut_format", help="Cutting sites of restriction enzyme. Ex: MspI(C-CGG), Mael:(C-TAG), double-enzyme MspI&Mael:(C-CGG,C-TAG). [Default: %default]", metavar="pattern", default = "C-CGG") - opt_group.add_option("-L", "--low", type = "int", dest="rrbs_low_bound", help="lower bound of fragment length (excluding C-CGG ends) [Default: %default]", default = 40) - opt_group.add_option("-U", "--up", type = "int", dest="rrbs_up_bound", help="upper bound of fragment length (excluding C-CGG ends) [Default: %default]", default = 500) + opt_group.add_option("-L", "--low", type = "int", dest="rrbs_low_bound", help="Lower bound of fragment length (excluding C-CGG ends) [Default: %default]", default = 20) + opt_group.add_option("-U", "--up", type = "int", dest="rrbs_up_bound", help="Upper bound of fragment length (excluding C-CGG ends) [Default: %default]", default = 500) parser.add_option_group(opt_group) # option group 4 opt_group = OptionGroup(parser, "General options") opt_group.add_option("-t", "--tag", type="string", dest="taginfo",help="[Y]es for undirectional lib, [N]o for directional [Default: %default]", metavar="TAG", default = 'N') - opt_group.add_option("-s","--start_base",type = "int",dest = "cutnumber1", help="The first base of your read to be mapped [Default: %default]", default = 1) - opt_group.add_option("-e","--end_base",type = "int",dest = "cutnumber2", help="The last cycle number of your read to be mapped [Default: %default]", default = 200) - opt_group.add_option("-a", "--adapter", type="string", dest="adapter_file",help="Input text file of your adaptor sequences (to be trimed from the 3'end of the reads). Input 1 seq for dir. lib., 2 seqs for undir. lib. One line per sequence", metavar="FILE", default = '') - opt_group.add_option("--am",type = "int",dest = "adapter_mismatch", help="Number of mismatches allowed in adaptor [Default: %default]", default = 0) - opt_group.add_option("-g", "--genome", type="string", dest="genome",help="Name of the reference genome (the same as the reference genome file in the preprocessing step) [ex. chr21_hg18.fa]") - opt_group.add_option("-m", "--mismatches",type = "int", dest="int_no_mismatches",help="Number of mismatches in one read [Default: %default]", default = 4) - opt_group.add_option("--aligner", dest="aligner",help="Aligner program to perform the analisys: " + ', '.join(supported_aligners) + " [Default: %default]", metavar="ALIGNER", default = BOWTIE2) - opt_group.add_option("-p", "--path", dest="aligner_path", help="Path to the aligner program. Defaults: " +' '*70+ '\t'.join(('%s: %s '+' '*70) % (al, aligner_path[al]) for al in sorted(supported_aligners)), + opt_group.add_option("-s","--start_base",type = "int",dest = "cutnumber1", help="The first cycle of the read to be mapped [Default: %default]", default = 1) + opt_group.add_option("-e","--end_base",type = "int",dest = "cutnumber2", help="The last cycle of the read to be mapped [Default: %default]", default = 200) + opt_group.add_option("-a", "--adapter", type="string", dest="adapter_file",help="Input text file of your adaptor sequences (to be trimmed from the 3'end of the reads, ). " + "Input one seq for dir. lib., twon seqs for undir. lib. One line per sequence. " + "Only the first 10bp will be used", metavar="FILE", default = '') + opt_group.add_option("--am",type = "int",dest = "adapter_mismatch", help="Number of mismatches allowed in adapter [Default: %default]", default = 0) + opt_group.add_option("-g", "--genome", type="string", dest="genome",help="Name of the reference genome (should be the same as \"-f\" in bs_seeker2-build.py ) [ex. chr21_hg18.fa]") + opt_group.add_option("-m", "--mismatches",type = "float", dest="no_mismatches",help="Number of mismatches in one read [Default: %default]", default = 4) + opt_group.add_option("--aligner", dest="aligner",help="Aligner program for short reads mapping: " + ', '.join(supported_aligners) + " [Default: %default]", metavar="ALIGNER", default = BOWTIE) + opt_group.add_option("-p", "--path", dest="aligner_path", help="Path to the aligner program. Detected: " +' '*70+ '\t'.join(('%s: %s '+' '*70) % (al, aligner_path[al]) for al in sorted(supported_aligners)), metavar="PATH" ) opt_group.add_option("-d", "--db", type="string", dest="dbpath",help="Path to the reference genome library (generated in preprocessing genome) [Default: %default]" , metavar="DBPATH", default = reference_genome_path) @@ -52,7 +55,7 @@ opt_group.add_option("-o", "--output", type="string", dest="outfilename",help="The name of output file [INFILE.bs(se|pe|rrbs)]", metavar="OUTFILE") opt_group.add_option("-f", "--output-format", type="string", dest="output_format",help="Output format: "+', '.join(output.formats)+" [Default: %default]", metavar="FORMAT", default = output.BAM) opt_group.add_option("--no-header", action="store_true", dest="no_SAM_header",help="Suppress SAM header lines [Default: %default]", default = False) - opt_group.add_option("--temp_dir", type="string", dest="temp_dir",help="The path to your temporary directory [Default: %default]", metavar="PATH", default = tempfile.gettempdir()) + opt_group.add_option("--temp_dir", type="string", dest="temp_dir",help="The path to your temporary directory [Detected: %default]", metavar="PATH", default = tempfile.gettempdir()) opt_group.add_option("--XS",type = "string", dest="XS_filter",help="Filter definition for tag XS, format X,Y. X=0.8 and y=5 indicate that for one read, if #(mCH sites)/#(all CH sites)>0.8 and #(mCH sites)>5, then tag XS=1; or else tag XS=0. [Default: %default]", default = "0.5,5") # added by weilong opt_group.add_option("--multiple-hit", action="store_true", dest="Output_multiple_hit", default = False, help = 'Output reads with multiple hits to file\"Multiple_hit.fa\"') @@ -96,7 +99,7 @@ # if no options were given by the user, print help and exit if len(sys.argv) == 1: - print parser.print_help() + parser.print_help() exit(0) if options.version : @@ -112,6 +115,38 @@ if not (options.infilename or (options.infilename_1 and options.infilename_2)): error('You should set either -i or -1 and -2 options.') + + # Calculate the length of read + if options.infilename : + read_file = options.infilename + elif options.infilename_1 : + read_file = options.infilename_1 + else : + error('You should at least specify -i or -1 options.') + + try : + if read_file.endswith(".gz") : # support input file ending with ".gz" + read_inf = gzip.open(read_file, "rb") + else : + read_inf=open(read_file,"r") + except IOError : + print "[Error] Cannot open input file : %s" % read_file + exit(-1) + oneline = read_inf.readline() + oneline = read_inf.readline() # get the second line + read_len = min(len(oneline), (options.cutnumber2-options.cutnumber1)) + read_inf.close() + # mismatch allowed: bowtie 1,build-in parameter '-m'; bowtie 2, post-filter paramter + # mismatch should no greater than the read length + no_mismatches = float(options.no_mismatches) + if (no_mismatches < 1) : + int_no_mismatches=int(no_mismatches * read_len) + else : + int_no_mismatches=int(no_mismatches) + + str_no_mismatches=str(options.no_mismatches) # pass to specific mode + + # -t, directional / un-directional library asktag=str(options.taginfo).upper() if asktag not in 'YN': @@ -121,10 +156,8 @@ error('-a option should be: %s' % ' ,'.join(supported_aligners)+'.') # path for aligner aligner_exec = os.path.expanduser( os.path.join(options.aligner_path or aligner_path[options.aligner], options.aligner) ) - # mismatch allowed: bowtie 1,build-in parameter '-m'; bowtie 2, post-filter paramter - # mismatch should no greater than the read length - int_no_mismatches=min(options.int_no_mismatches, options.cutnumber2-options.cutnumber1) - str_no_mismatches=str(int_no_mismatches) + + # -g if options.genome is None: error('-g is a required option') @@ -149,19 +182,17 @@ db_path = os.path.expanduser(os.path.join(options.dbpath, genome_subdir + '_' + options.aligner)) + if not os.path.isdir(db_path): error('Index DIR \"' + genome_subdir + '..\" cannot be found in ' + options.dbpath +'.\n\tPlease run the bs_seeker2-build.py ' 'to create it with the correct parameters for -g, -r, --low, --up and --aligner.') - # handle aligner options - # - # default aligner options aligner_options_defaults = { BOWTIE : { '-e' : 40*int_no_mismatches, '--nomaqround' : True, '--norc' : True, - '-k' : 2, + #'-k' : 2, # -k=2; report two best hits, and filter by error rates '--quiet' : True, '--best' : True, @@ -179,7 +210,7 @@ # run bowtie2 in local mode by default '--local' : '--end-to-end' not in aligner_options, #'--mm' : True, - '-k' : 2 + #'-k' : 2 }, SOAP : { '-v' : int_no_mismatches, '-p' : 2, @@ -238,13 +269,22 @@ else: aligner_title = aligner_title + "-local" + if options.aligner == BOWTIE : + logm("Mode: Bowtie") + elif options.aligner == BOWTIE2 : + if '--end-to-end' not in aligner_options : + logm("Mode: Bowtie2, local alignment") + else : + logm("Mode: Bowtie2, end-to-end alignment") + + tmp_path = tempfile.mkdtemp(prefix='bs_seeker2_%s_-%s-TMP-' % (os.path.split(outfilename)[1], aligner_title ), dir = options.temp_dir) (XS_x, XS_y) = options.XS_filter.split(",") XS_pct = float(XS_x) XS_count = int(XS_y) - logm('Filter for tag XS: #(mCH)/#(all CH)>%f and #(mCH)>%d' % (XS_pct, XS_count)) + logm('Filter for tag XS: #(mCH)/#(all CH)>%.2f%% and #(mCH)>%d' % (XS_pct*100, XS_count)) logm('Temporary directory: %s' % tmp_path) @@ -253,8 +293,8 @@ logm('Single end') aligner_command = aligner_exec + aligner_options_string() + \ - { BOWTIE : ' %(reference_genome)s -f %(input_file)s %(output_file)s', - BOWTIE2 : ' -x %(reference_genome)s -f -U %(input_file)s -S %(output_file)s', + { BOWTIE : ' -k 2 %(reference_genome)s -f %(input_file)s %(output_file)s', + BOWTIE2 : ' -k 2 -x %(reference_genome)s -f -U %(input_file)s -S %(output_file)s', SOAP : ' -D %(reference_genome)s.fa.index -o %(output_file)s -a %(input_file)s', RMAP : ' -c %(reference_genome)s.fa -o %(output_file)s %(input_file)s' }[options.aligner] @@ -263,7 +303,6 @@ if options.rrbs: # RRBS scan bs_rrbs(options.infilename, asktag, - # options.rrbs_taginfo, options.adapter_file, options.cutnumber1, options.cutnumber2, @@ -299,18 +338,18 @@ else: logm('Pair end') # pair end specific default options - aligner_options = dict({BOWTIE: {'--ff' : asktag == 'N', - '--fr' : asktag == 'Y', + aligner_options = dict({BOWTIE: {'--fr' : True, '-X' : options.max_insert_size, - '-I' : options.min_insert_size if options.min_insert_size > 0 else None + '-I' : options.min_insert_size if options.min_insert_size > 0 else None, + '-a' : True # "-k 2" in bowtie would not report the best two }, BOWTIE2 : { - '--ff' : asktag == 'N', - '--fr' : asktag == 'Y', + '--fr' : True, '-X' : options.max_insert_size, '-I' : options.min_insert_size if options.min_insert_size > 0 else None, '--no-discordant' : True, - '--no-mixed' : True + '--no-mixed' : True, + '-k' : 2 }, SOAP: { '-x' : options.max_insert_size, @@ -328,6 +367,11 @@ logm('Aligner command: %s' % aligner_command) + if '--end-to-end' not in aligner_options: + aligner_options_defaults[BOWTIE2].update({'-D' : 50}) + else: + aligner_options_defaults[BOWTIE2].update({'-D' : 50, '-L': 15, '--score-min': 'L,-0.6,-0.6' }) + bs_pair_end(options.infilename_1, options.infilename_2, asktag, @@ -342,6 +386,7 @@ outfile, XS_pct, XS_count, + options.adapter_mismatch, options.Output_multiple_hit )
--- a/BSseeker2/bs_seeker2-build.py Fri Jul 12 18:47:28 2013 -0400 +++ b/BSseeker2/bs_seeker2-build.py Tue Nov 05 01:55:39 2013 -0500 @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python import os from optparse import OptionParser, OptionGroup @@ -12,8 +12,8 @@ parser = OptionParser() parser.add_option("-f", "--file", dest="filename", help="Input your reference genome file (fasta)", metavar="FILE") - parser.add_option("--aligner", dest="aligner", help="Aligner program to perform the analysis: " + ', '.join(supported_aligners) + " [Default: %default]", metavar="ALIGNER", default = BOWTIE2) - parser.add_option("-p", "--path", dest="aligner_path", help="Path to the aligner program. Defaults: " +' '*70+ '\t'.join(('%s: %s '+' '*70) % (al, aligner_path[al]) for al in sorted(supported_aligners)), + parser.add_option("--aligner", dest="aligner", help="Aligner program to perform the analysis: " + ', '.join(supported_aligners) + " [Default: %default]", metavar="ALIGNER", default = BOWTIE) + parser.add_option("-p", "--path", dest="aligner_path", help="Path to the aligner program. Detected: " +' '*70+ '\t'.join(('%s: %s '+' '*70) % (al, aligner_path[al]) for al in sorted(supported_aligners)), metavar="PATH") parser.add_option("-d", "--db", type="string", dest="dbpath", help="Path to the reference genome library (generated in preprocessing genome) [Default: %default]", metavar="DBPATH", default = reference_genome_path) @@ -23,7 +23,7 @@ rrbs_opts = OptionGroup(parser, "Reduced Representation Bisulfite Sequencing Options", "Use this options with conjuction of -r [--rrbs]") rrbs_opts.add_option("-r", "--rrbs", action="store_true", dest="rrbs", help = 'Build index specially for Reduced Representation Bisulfite Sequencing experiments. Genome other than certain fragments will be masked. [Default: %default]', default = False) - rrbs_opts.add_option("-l", "--low",type= "int", dest="low_bound", help="lower bound of fragment length (excluding recognition sequence such as C-CGG) [Default: %default]", default = 40) + rrbs_opts.add_option("-l", "--low",type= "int", dest="low_bound", help="lower bound of fragment length (excluding recognition sequence such as C-CGG) [Default: %default]", default = 20) rrbs_opts.add_option("-u", "--up", type= "int", dest="up_bound", help="upper bound of fragment length (excluding recognition sequence such as C-CGG ends) [Default: %default]", default = 500) rrbs_opts.add_option("-c", "--cut-site", type= "string", dest="cut_format", help="Cut sites of restriction enzyme. Ex: MspI(C-CGG), Mael:(C-TAG), double-enzyme MspI&Mael:(C-CGG,C-TAG). [Default: %default]", default = "C-CGG") parser.add_option_group(rrbs_opts) @@ -33,7 +33,7 @@ # if no options were given by the user, print help and exit if len(sys.argv) == 1: - print parser.print_help() + parser.print_help() exit(0) if options.version : @@ -44,7 +44,11 @@ rrbs = options.rrbs - fasta_file=os.path.expanduser(options.filename) + if options.filename is not None : + fasta_file=os.path.expanduser(options.filename) + else : + error("Please specify the genome file (Fasta) using \"-f\"") + if fasta_file is None: error('Fasta file for the reference genome must be supported') @@ -69,9 +73,14 @@ print "Reference genome file: %s" % fasta_file print "Reduced Representation Bisulfite Sequencing: %s" % rrbs + print "Short reads aligner you are using: %s" % options.aligner print "Builder path: %s" % builder_exec + #--------------------------------------------------------------- + if not os.path.isfile( builder_exec ) : + error("Cannot file program %s for execution." % builder_exec) + ref_path = options.dbpath if os.path.exists(ref_path):
--- a/BSseeker2/bs_seeker2-call_methylation.py Fri Jul 12 18:47:28 2013 -0400 +++ b/BSseeker2/bs_seeker2-call_methylation.py Tue Nov 05 01:55:39 2013 -0500 @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python from optparse import OptionParser, OptionGroup from bs_utils.utils import * @@ -71,7 +71,7 @@ # if no options were given by the user, print help and exit if len(sys.argv) == 1: - print parser.print_help() + parser.print_help() exit(0) if options.version : @@ -133,8 +133,6 @@ nuc, context, subcontext = context_calling(chrom_seq, col.pos) total_reads = 0 - - for pr in col.pileups: # print pr if (not pr.indel) : # skip indels @@ -152,7 +150,6 @@ print 'WARNING: read %s has an invalid alignment. Discarding.. ' % pr.alignment.qname continue read_nuc = pr.alignment.seq[pr.qpos] - # print "read_nuc=", read_nuc if pr.alignment.is_reverse: ATCG_rev[read_nuc] += 1 else: @@ -194,7 +191,8 @@ wiggle.write('%d\t%f\n' % (pos, meth_level)) else : wiggle.write('%d\t-%f\n' % (pos, meth_level)) - CGmap.write('%(chrom)s\t%(nuc)s\t%(pos)d\t%(context)s\t%(subcontext)s\t%(meth_level_string)s\t%(meth_cytosines)s\t%(all_cytosines)s\n' % locals()) + + CGmap.write('%(chrom)s\t%(nuc)s\t%(pos)d\t%(context)s\t%(subcontext)s\t%(meth_level_string)s\t%(meth_cytosines)s\t%(all_cytosines)s\n' % locals()) ATCGmap.close() CGmap.close() wiggle.close()
--- a/BSseeker2/bs_utils/utils.py Fri Jul 12 18:47:28 2013 -0400 +++ b/BSseeker2/bs_utils/utils.py Tue Nov 05 01:55:39 2013 -0500 @@ -23,7 +23,7 @@ def show_version() : print "" - print " BS-Seeker2 v2.0.3 - May 19, 2013 " + print " BS-Seeker2 v2.0.5 - Nov 5, 2013 " print "" @@ -135,14 +135,15 @@ SOAP : '--soap-', RMAP : '--rmap-' } -aligner_path = dict((aligner, os.path.expanduser(find_location(aligner) or default_path)) - for aligner, default_path in - [(BOWTIE,'~/bowtie-0.12.7/'), - (BOWTIE2, '~/bowtie-0.12.7/'), - (SOAP, '~/soap2.21release/'), - (RMAP, '~/rmap_v2.05/bin') - ]) - +#aligner_path = dict((aligner, os.path.expanduser(find_location(aligner) or default_path)) +# for aligner, default_path in +# [(BOWTIE,'~/bowtie/'), +# (BOWTIE2, '~/bowtie2/'), +# (SOAP, '~/soap/'), +# (RMAP, '~/rmap/bin') +# ]) +aligner_path = dict((aligner, os.path.expanduser(find_location(aligner) or "None")) + for aligner in [(BOWTIE), (BOWTIE2), (SOAP), (RMAP)]) reference_genome_path = os.path.join(os.path.split(globals()['__file__'])[0],'reference_genomes') @@ -214,7 +215,10 @@ """ Splits a file (equivalend to UNIX split -l ) """ fno = 0 lno = 0 - INPUT = open(filename, 'r') + if filename.endswith(".gz") : + INPUT = gzip.open(filename, 'rb') + else : + INPUT = open(filename, 'r') output = None for l in INPUT: if lno == 0: @@ -321,7 +325,10 @@ commands = [(cmd[0], open(cmd[1], 'w')) if type(cmd) is tuple else (cmd, None) for cmd in commands] - logm('Starting commands:\n' + '\n'.join([cmd for cmd, stdout in commands])) + #logm('Starting commands:\n' + '\n'.join([cmd for cmd, stdout in commands])) + logm('Starting commands:') + for cmd, stdout in commands : + logm("Launched: "+cmd) for i, proc in enumerate([subprocess.Popen(args = shlex.split(cmd), stdout = stdout) for cmd, stdout in commands]): return_code = proc.wait() logm('Finished: ' + commands[i][0])
--- a/BSseeker2/galaxy/bs_seeker2_wrapper.py Fri Jul 12 18:47:28 2013 -0400 +++ b/BSseeker2/galaxy/bs_seeker2_wrapper.py Tue Nov 05 01:55:39 2013 -0500 @@ -114,7 +114,6 @@ ('_rrbs_%s_%s' % (getopt(args[ALIGN], '-l', '--low', '40'), getopt(args[ALIGN], '-u', '--up', '500')) if len(set(['-r', '--rrbs']) & set(args[ALIGN])) > 0 else '') + - '_' + args[ALIGN]['--aligner']) }) run_prog(os.path.join(path_to_bs_seeker, 'bs_seeker2-call_methylation.py'), args[CALL_METHYLATION])
--- a/BSseeker2/galaxy/bs_seeker2_wrapper.xml Fri Jul 12 18:47:28 2013 -0400 +++ b/BSseeker2/galaxy/bs_seeker2_wrapper.xml Tue Nov 05 01:55:39 2013 -0500 @@ -1,255 +1,270 @@ -<tool id="bs_seeker_wrapper" name="BS-Seeker2" version="2.0.0"> - <requirements><requirement type='package'>bs_seeker2</requirement></requirements> - <description>Versatile aligner for bisulfite sequencing data</description> - <command interpreter="python"> - bs_seeker2_wrapper.py - ### define exec path - ### --exec-path "/u/home/galaxy/galaxy/GalaxyTools/bin" - ### [Please change the following path to your local directory] - --exec-path "/Users/weilong/Documents/program/BSseeker2" - ### output - --align--output $align_output - --call_methylation--wig $call_methylation_wig - --call_methylation--CGmap $call_methylation_CGmap - --call_methylation--ATCGmap $call_methylation_ATCGmap - --call_methylation--txt - - #if $singlePaired.sPaired == "paired" - --align--input_1 $input1 - --align--input_2 $singlePaired.input2 - #end if - - - ### aligner - --align--aligner ${choosealigner.aligner} - - ### Index from history or built-in - #if $choosealigner.rrbsFragments.refGenomeSource.genomeSource == "history" - --build--file ${choosealigner.rrbsFragments.refGenomeSource.ownFile} - --build--aligner ${choosealigner.aligner} - --align-g ${choosealigner.rrbsFragments.refGenomeSource.ownFile} - --align--db ${choosealigner.rrbsFragments.refGenomeSource.ownFile} - #else if $choosealigner.rrbsFragments.refGenomeSource.genomeSource == "indexed" - --align--db ${choosealigner.rrbsFragments.refGenomeSource.index.fields.path} - --align-g ${choosealigner.rrbsFragments.refGenomeSource.index.fields.path}/${choosealigner.rrbsFragments.refGenomeSource.index.fields.dbkey}.fa - - #end if - - ### RRBS or WGBS - #if $choosealigner.rrbsFragments.Fragmented == "Yes" - #if $choosealigner.rrbsFragments.refGenomeSource.genomeSource == "history" - --build--rrbs - --build--low ${choosealigner.rrbsFragments.lowerBound} - --build--up ${choosealigner.rrbsFragments.upperBound} - #end if - --align--rrbs - --align--low ${choosealigner.rrbsFragments.lowerBound} - --align--up ${choosealigner.rrbsFragments.upperBound} - #end if - - - - ### Inputs - #if $singlePaired.sPaired == "single" - --align-i $input1 - #end if - - ### Library type - --align-t $tag - - ### other general options - #if $sParams.sSettingsType == "preSet" - --align--start_base 1 - --align--end_base 200 - --align--mis 4 - #end if - - ### adapter information - #if $adapterInfo.useAdapter == "Yes" - --align--adapter ${adapterInfo.adapter_file} - #end if - - #if $sParams.sSettingsType == "full" - --align--start_base ${sParams.start_base} - --align--end_base ${sParams.end_base} - --align--mis ${sParams.num_mismatch} - #end if - - </command> - <inputs> - <param format="fastq,fasta,qseq" name="input1" type="data" label="Input your read file" help="reads file in fastq, qseq or fasta format" /> - <conditional name="singlePaired"> - <param name="sPaired" type="select" label="Is this library mate-paired?"> - <option value="single">Single-end</option> - <option value="paired">Paired-end</option> - </param> - <when value="paired"> - <param format="fastq,fasta,qseq" name="input2" type="data" label="Input your read file 2" help="reads in fastq, qseq or fasta format" /> - <param name="min_ins_distance" type="integer" value="-1" label=" Minimum insert size for valid paired-end alignments" /> - <param name="max_ins_distance" type="integer" value="400" label="Maximum insert size for valid paired-end alignments" /> - </when> - </conditional> - <param name="tag" type="select" label="Type of libraries"> - <option value="N">directional libraries</option> - <option value="Y">undirectional libraries</option> - </param> - <conditional name="choosealigner"> - <param name="aligner" type="select" label="Short reads aligner"> - <option value="bowtie">bowtie</option> - <option value="bowtie2">bowtie2</option> - </param> - <when value="bowtie"> - <conditional name="rrbsFragments"> - <param name="Fragmented" type="select" label="RRBS-seq reads" help=""> - <option value="No">No</option> - <option value="Yes">Yes</option> - </param> - <when value="Yes"> - <param name="lowerBound" type="integer" value="40" label="The lower bound for RRBS fragments" help="Default: 40" /> - <param name="upperBound" type="integer" value="500" label="The upper bound for RRBS fragments" help="Default: 500" /> - <conditional name="refGenomeSource"> - <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help=""> - <option value="indexed">Use a built-in index</option> - <option value="history">Use one from the history</option> - </param> - <when value="indexed"> - <param name="index" type="select" label="Select a reference genome (RRBS, bowtie)"> - <options from_data_table="bs_seeker2_indexes_RRBS_bowtie"> - <filter type="sort_by" column="2"/> - <validator type="no_options" message="No indexes are available for the selected input dataset"/> - </options> - </param> - </when> - <when value="history"> - <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" /> - </when> - </conditional> - </when> - <when value="No"> - <conditional name="refGenomeSource"> - <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help=""> - <option value="indexed">Use a built-in index</option> - <option value="history">Use one from the history</option> - </param> - <when value="indexed"> - <param name="index" type="select" label="Select a reference genome (WGBS, bowtie)"> - <options from_data_table="bs_seeker2_indexes_WGBS_bowtie"> - <filter type="sort_by" column="2"/> - <validator type="no_options" message="No indexes are available for the selected input dataset"/> - </options> - </param> - </when> - <when value="history"> - <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" /> - </when> - </conditional> - </when> - </conditional> - </when> - - <when value="bowtie2"> - <conditional name="rrbsFragments"> - <param name="Fragmented" type="select" label="RRBS-seq reads" help=""> - <option value="No">No</option> - <option value="Yes">Yes</option> - </param> - <when value="Yes"> - <param name="lowerBound" type="integer" value="40" label="The lower bound for RRBS fragments" help="Default: 40" /> - <param name="upperBound" type="integer" value="500" label="The upper bound for RRBS fragments" help="Default: 500" /> - <conditional name="refGenomeSource"> - <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help=""> - <option value="indexed">Use a built-in index</option> - <option value="history">Use one from the history</option> - </param> - <when value="indexed"> - <param name="index" type="select" label="Select a reference genome (RRBS, bowtie2)"> - <options from_data_table="bs_seeker2_indexes_RRBS_bowtie2"> - <filter type="sort_by" column="2"/> - <validator type="no_options" message="No indexes are available for the selected input dataset"/> - </options> - </param> - </when> - <when value="history"> - <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" /> - </when> - </conditional> - </when> - <when value="No"> - <conditional name="refGenomeSource"> - <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help=""> - <option value="indexed">Use a built-in index</option> - <option value="history">Use one from the history</option> - </param> - <when value="indexed"> - <param name="index" type="select" label="Select a reference genome (WGBS, bowtie2)"> - <options from_data_table="bs_seeker2_indexes_WGBS_bowtie2"> - <filter type="sort_by" column="2"/> - <validator type="no_options" message="No indexes are available for the selected input dataset"/> - </options> - </param> - </when> - <when value="history"> - <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" /> - </when> - </conditional> - </when> - </conditional> - </when> - </conditional> - <conditional name="adapterInfo"> - <param name="useAdapter" type="select" label="adapter sequence"> - <option value="noAdapter">No</option> - <option value="withAdapter">Yes</option> - </param> - <when value="withAdapter"> - <param format="txt" name="adapter_file" type="data" label="Input file of your adaptor sequences" help="Input text file of your adaptor sequences" /> - </when> - </conditional> - - <conditional name="sParams"> - <param name="sSettingsType" type="select" label="BS Seeker2 settings to use" help="You can use the default settings or set customer values for the BS Seeker2 parameters."> - <option value="preSet">User Defaults</option> - <option value="full">Full parameter list</option> - </param> - <when value="preSet" /> - <when value="full"> - <param name="start_base" type="integer" value="1" label="The start base of the read to be mapped" help="" /> - <param name="end_base" type="integer" value="200" label="The end base of the read to be mapped" help="" /> - - <param name="num_mismatch" type="integer" value="4" label="Number of mismatches" help="(INT) Default: 4" /> - </when> - </conditional> - -</inputs> - - <outputs> - <data format="bam" name="align_output" label="BAM Alignments"> </data> - <data format="wig" name="call_methylation_wig" label="Methylation Levels"> </data> - <data format="tabular" name="call_methylation_CGmap" label="CGmap file"> </data> - <data format="tabular" name="call_methylation_ATCGmap" label="ATCGmap file"> </data> - - </outputs> - <help> -**What it does** - -BS-Seeker2 is a seamlessly pipeline for mapping bisulfite sequencing data and generating detailed DNA methylome. BS-Seeker2 improves mappability by using local alignment, and is tailored for RRBS library by building special index, with higher efficiency and accuracy. This is the Galaxy version of BS-Seeker2. - ------- - -**Resources** - -The homepage for BS-Seeker2 is http://pellegrini.mcdb.ucla.edu/BS_Seeker2/. - -For more information of BS-Seeker2, please refer to https://github.com/BSSeeker/BSseeker2. - ------- - -**Example** - -- Adapter file:: - - AGATCGGAAGAGCACACGTC - - - </help> -</tool> +<tool id="bs_seeker_wrapper" name="BS-Seeker2" version="2.0.0"> + <requirements><requirement type='package'>bs_seeker2</requirement></requirements> + <description>Versatile aligner for bisulfite sequencing data</description> + <command interpreter="python"> + bs_seeker2_wrapper.py + ### define exec path + ### --exec-path "/u/home/galaxy/galaxy/GalaxyTools/bin" + ### [Please change the following path to your local directory] + --exec-path "/Users/weilong/Documents/program/BSseeker2" + ### output + --align--output $align_output + --call_methylation--wig $call_methylation_wig + --call_methylation--CGmap $call_methylation_CGmap + --call_methylation--ATCGmap $call_methylation_ATCGmap + --call_methylation--txt + + #if $singlePaired.sPaired == "paired" + --align--input_1 $input1 + --align--input_2 $singlePaired.input2 + #end if + + + ### aligner + --align--aligner ${choosealigner.aligner} + #if $choosealigner.aligner == "bowtie2" + #if $choosealigner.mode_type == "local" + --align--bt2--local + #else + --align--bt2--end-to-end + #end if + #end if + + ### Index from history or built-in + #if $choosealigner.rrbsFragments.refGenomeSource.genomeSource == "history" + --build--file ${choosealigner.rrbsFragments.refGenomeSource.ownFile} + --build--aligner ${choosealigner.aligner} + --align-g ${choosealigner.rrbsFragments.refGenomeSource.ownFile} + --align--db ${choosealigner.rrbsFragments.refGenomeSource.ownFile} + #else if $choosealigner.rrbsFragments.refGenomeSource.genomeSource == "indexed" + --align--db ${choosealigner.rrbsFragments.refGenomeSource.index.fields.path} + --align-g ${choosealigner.rrbsFragments.refGenomeSource.index.fields.path}/${choosealigner.rrbsFragments.refGenomeSource.index.fields.dbkey}.fa + + #end if + + ### RRBS or WGBS + #if $choosealigner.rrbsFragments.Fragmented == "Yes" + #if $choosealigner.rrbsFragments.refGenomeSource.genomeSource == "history" + --build--rrbs + --build--low ${choosealigner.rrbsFragments.lowerBound} + --build--up ${choosealigner.rrbsFragments.upperBound} + #end if + --align--rrbs + --align--low ${choosealigner.rrbsFragments.lowerBound} + --align--up ${choosealigner.rrbsFragments.upperBound} + #end if + + + + ### Inputs + #if $singlePaired.sPaired == "single" + --align-i $input1 + #end if + + ### Library type + --align-t $tag + + ### other general options + #if $sParams.sSettingsType == "preSet" + --align--start_base 1 + --align--end_base 200 + --align--mis 4 + #end if + + ### adapter information + #if $adapterInfo.useAdapter == "Yes" + --align--adapter ${adapterInfo.adapter_file} + #end if + + #if $sParams.sSettingsType == "full" + --align--start_base ${sParams.start_base} + --align--end_base ${sParams.end_base} + --align--mis ${sParams.num_mismatch} + #end if + + </command> + <inputs> + <param format="fastq,fasta,qseq" name="input1" type="data" label="Input your read file" help="reads file in fastq, qseq or fasta format" /> + <conditional name="singlePaired"> + <param name="sPaired" type="select" label="Is this library mate-paired?"> + <option value="single">Single-end</option> + <option value="paired">Paired-end</option> + </param> + <when value="paired"> + <param format="fastq,fasta,qseq" name="input2" type="data" label="Input your read file 2" help="reads in fastq, qseq or fasta format" /> + <param name="min_ins_distance" type="integer" value="-1" label=" Minimum insert size for valid paired-end alignments" /> + <param name="max_ins_distance" type="integer" value="400" label="Maximum insert size for valid paired-end alignments" /> + </when> + </conditional> + <param name="tag" type="select" label="Type of libraries"> + <option value="N">directional libraries</option> + <option value="Y">undirectional libraries</option> + </param> + <conditional name="choosealigner"> + <param name="aligner" type="select" label="Short reads aligner"> + <option value="bowtie">bowtie</option> + <option value="bowtie2">bowtie2</option> + </param> + <when value="bowtie"> + <conditional name="rrbsFragments"> + <param name="Fragmented" type="select" label="RRBS-seq reads" help=""> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + <when value="Yes"> + <param name="lowerBound" type="integer" value="40" label="The lower bound for RRBS fragments" help="Default: 40" /> + <param name="upperBound" type="integer" value="500" label="The upper bound for RRBS fragments" help="Default: 500" /> + <conditional name="refGenomeSource"> + <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help=""> + <option value="indexed">Use a built-in index</option> + <option value="history">Use one from the history</option> + </param> + <when value="indexed"> + <param name="index" type="select" label="Select a reference genome (RRBS, bowtie)"> + <options from_data_table="bs_seeker2_indexes_RRBS_bowtie"> + <filter type="sort_by" column="2"/> + <validator type="no_options" message="No indexes are available for the selected input dataset"/> + </options> + </param> + </when> + <when value="history"> + <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" /> + </when> + </conditional> + </when> + <when value="No"> + <conditional name="refGenomeSource"> + <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help=""> + <option value="indexed">Use a built-in index</option> + <option value="history">Use one from the history</option> + </param> + <when value="indexed"> + <param name="index" type="select" label="Select a reference genome (WGBS, bowtie)"> + <options from_data_table="bs_seeker2_indexes_WGBS_bowtie"> + <filter type="sort_by" column="2"/> + <validator type="no_options" message="No indexes are available for the selected input dataset"/> + </options> + </param> + </when> + <when value="history"> + <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" /> + </when> + </conditional> + </when> + </conditional> + </when> + + <when value="bowtie2"> + <conditional name="rrbsFragments"> + <param name="Fragmented" type="select" label="RRBS-seq reads" help=""> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + <when value="Yes"> + <param name="lowerBound" type="integer" value="40" label="The lower bound for RRBS fragments" help="Default: 40" /> + <param name="upperBound" type="integer" value="500" label="The upper bound for RRBS fragments" help="Default: 500" /> + <conditional name="refGenomeSource"> + <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help=""> + <option value="indexed">Use a built-in index</option> + <option value="history">Use one from the history</option> + </param> + <when value="indexed"> + <param name="index" type="select" label="Select a reference genome (RRBS, bowtie2)"> + <options from_data_table="bs_seeker2_indexes_RRBS_bowtie2"> + <filter type="sort_by" column="2"/> + <validator type="no_options" message="No indexes are available for the selected input dataset"/> + </options> + </param> + </when> + <when value="history"> + <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" /> + </when> + </conditional> + </when> + <when value="No"> + <conditional name="refGenomeSource"> + <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help=""> + <option value="indexed">Use a built-in index</option> + <option value="history">Use one from the history</option> + </param> + <when value="indexed"> + <param name="index" type="select" label="Select a reference genome (WGBS, bowtie2)"> + <options from_data_table="bs_seeker2_indexes_WGBS_bowtie2"> + <filter type="sort_by" column="2"/> + <validator type="no_options" message="No indexes are available for the selected input dataset"/> + </options> + </param> + </when> + <when value="history"> + <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" /> + </when> + </conditional> + </when> + </conditional> + + + <conditional name="mode_type"> + <param name="mode" type="select" label="Select the mode of Bowtie2"> + <option value="local" selected="true"> local alignment </option> + <option value="end_to_end" > end-to-end alignment </option> + </param> + </conditional> + </when> + </conditional> + <conditional name="adapterInfo"> + <param name="useAdapter" type="select" label="adapter sequence"> + <option value="noAdapter">No</option> + <option value="withAdapter">Yes</option> + </param> + <when value="withAdapter"> + <param format="txt" name="adapter_file" type="data" label="Input file of your adaptor sequences" help="Input text file of your adaptor sequences" /> + </when> + </conditional> + + <conditional name="sParams"> + <param name="sSettingsType" type="select" label="BS Seeker2 settings to use" help="You can use the default settings or set customer values for the BS Seeker2 parameters."> + <option value="preSet">User Defaults</option> + <option value="full">Full parameter list</option> + </param> + <when value="preSet" /> + <when value="full"> + <param name="start_base" type="integer" value="1" label="The start base of the read to be mapped" help="" /> + <param name="end_base" type="integer" value="200" label="The end base of the read to be mapped" help="" /> + + <param name="num_mismatch" type="integer" value="4" label="Number of mismatches" help="(INT) Default: 4" /> + </when> + </conditional> + +</inputs> + + <outputs> + <data format="bam" name="align_output" label="BAM Alignments"> </data> + <data format="wig" name="call_methylation_wig" label="Methylation Levels"> </data> + <data format="tabular" name="call_methylation_CGmap" label="CGmap file"> </data> + <data format="tabular" name="call_methylation_ATCGmap" label="ATCGmap file"> </data> + + </outputs> + <help> +**What it does** + +BS-Seeker2 is a seamlessly pipeline for mapping bisulfite sequencing data and generating detailed DNA methylome. BS-Seeker2 improves mappability by using local alignment, and is tailored for RRBS library by building special index, with higher efficiency and accuracy. This is the Galaxy version of BS-Seeker2. + +------ + +**Resources** + +The homepage for BS-Seeker2 is http://pellegrini.mcdb.ucla.edu/BS_Seeker2/. + +For more information of BS-Seeker2, please refer to https://github.com/BSSeeker/BSseeker2. + +------ + +**Example** + +- Adapter file:: + + AGATCGGAAGAGCACACGTC + + + </help> +</tool>