# HG changeset patch # User weilong-guo # Date 1383634539 18000 # Node ID 8b26adf64adce597afdb5373dc6cb71e951eb3d7 # Parent e6df770c0e58f3cf00f4997363ee817378a3cd7b V2.0.5 diff -r e6df770c0e58 -r 8b26adf64adc ._BSseeker2 Binary file ._BSseeker2 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.DS_Store Binary file BSseeker2/.DS_Store has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/._.DS_Store Binary file BSseeker2/._.DS_Store has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/._AUTHORS Binary file BSseeker2/._AUTHORS has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/._FilterReads.py Binary file BSseeker2/._FilterReads.py has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/._README.md Binary file BSseeker2/._README.md has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/._RELEASE_NOTES Binary file BSseeker2/._RELEASE_NOTES has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/._bs_align Binary file BSseeker2/._bs_align has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/._bs_index Binary file BSseeker2/._bs_index has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/._bs_seeker2-align.py Binary file BSseeker2/._bs_seeker2-align.py has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/._bs_seeker2-build.py Binary file BSseeker2/._bs_seeker2-build.py has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/._bs_seeker2-call_methylation.py Binary file BSseeker2/._bs_seeker2-call_methylation.py has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/._bs_utils Binary file BSseeker2/._bs_utils has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/._galaxy Binary file BSseeker2/._galaxy has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/COMMIT_EDITMSG --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/COMMIT_EDITMSG Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,1 @@ +V2.0.5 diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/HEAD --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/HEAD Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,1 @@ +ref: refs/heads/master diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/config --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/config Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,13 @@ +[core] + repositoryformatversion = 0 + filemode = true + bare = false + logallrefupdates = true + ignorecase = true + precomposeunicode = false +[remote "origin"] + url = https://github.com/BSSeeker/BSseeker2.git + fetch = +refs/heads/*:refs/remotes/origin/* +[branch "master"] + remote = origin + merge = refs/heads/master diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/description --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/description Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,1 @@ +Unnamed repository; edit this file 'description' to name the repository. diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/hooks/applypatch-msg.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/hooks/applypatch-msg.sample Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,15 @@ +#!/bin/sh +# +# An example hook script to check the commit log message taken by +# applypatch from an e-mail message. +# +# The hook should exit with non-zero status after issuing an +# appropriate message if it wants to stop the commit. The hook is +# allowed to edit the commit message file. +# +# To enable this hook, rename this file to "applypatch-msg". + +. git-sh-setup +test -x "$GIT_DIR/hooks/commit-msg" && + exec "$GIT_DIR/hooks/commit-msg" ${1+"$@"} +: diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/hooks/commit-msg.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/hooks/commit-msg.sample Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,24 @@ +#!/bin/sh +# +# An example hook script to check the commit log message. +# Called by "git commit" with one argument, the name of the file +# that has the commit message. The hook should exit with non-zero +# status after issuing an appropriate message if it wants to stop the +# commit. The hook is allowed to edit the commit message file. +# +# To enable this hook, rename this file to "commit-msg". + +# Uncomment the below to add a Signed-off-by line to the message. +# Doing this in a hook is a bad idea in general, but the prepare-commit-msg +# hook is more suited to it. +# +# SOB=$(git var GIT_AUTHOR_IDENT | sed -n 's/^\(.*>\).*$/Signed-off-by: \1/p') +# grep -qs "^$SOB" "$1" || echo "$SOB" >> "$1" + +# This example catches duplicate Signed-off-by lines. + +test "" = "$(grep '^Signed-off-by: ' "$1" | + sort | uniq -c | sed -e '/^[ ]*1[ ]/d')" || { + echo >&2 Duplicate Signed-off-by lines. + exit 1 +} diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/hooks/post-update.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/hooks/post-update.sample Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,8 @@ +#!/bin/sh +# +# An example hook script to prepare a packed repository for use over +# dumb transports. +# +# To enable this hook, rename this file to "post-update". + +exec git update-server-info diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/hooks/pre-applypatch.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/hooks/pre-applypatch.sample Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,14 @@ +#!/bin/sh +# +# An example hook script to verify what is about to be committed +# by applypatch from an e-mail message. +# +# The hook should exit with non-zero status after issuing an +# appropriate message if it wants to stop the commit. +# +# To enable this hook, rename this file to "pre-applypatch". + +. git-sh-setup +test -x "$GIT_DIR/hooks/pre-commit" && + exec "$GIT_DIR/hooks/pre-commit" ${1+"$@"} +: diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/hooks/pre-commit.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/hooks/pre-commit.sample Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,50 @@ +#!/bin/sh +# +# An example hook script to verify what is about to be committed. +# Called by "git commit" with no arguments. The hook should +# exit with non-zero status after issuing an appropriate message if +# it wants to stop the commit. +# +# To enable this hook, rename this file to "pre-commit". + +if git rev-parse --verify HEAD >/dev/null 2>&1 +then + against=HEAD +else + # Initial commit: diff against an empty tree object + against=4b825dc642cb6eb9a060e54bf8d69288fbee4904 +fi + +# If you want to allow non-ascii filenames set this variable to true. +allownonascii=$(git config hooks.allownonascii) + +# Redirect output to stderr. +exec 1>&2 + +# Cross platform projects tend to avoid non-ascii filenames; prevent +# them from being added to the repository. We exploit the fact that the +# printable range starts at the space character and ends with tilde. +if [ "$allownonascii" != "true" ] && + # Note that the use of brackets around a tr range is ok here, (it's + # even required, for portability to Solaris 10's /usr/bin/tr), since + # the square bracket bytes happen to fall in the designated range. + test $(git diff --cached --name-only --diff-filter=A -z $against | + LC_ALL=C tr -d '[ -~]\0' | wc -c) != 0 +then + echo "Error: Attempt to add a non-ascii file name." + echo + echo "This can cause problems if you want to work" + echo "with people on other platforms." + echo + echo "To be portable it is advisable to rename the file ..." + echo + echo "If you know what you are doing you can disable this" + echo "check using:" + echo + echo " git config hooks.allownonascii true" + echo + exit 1 +fi + +# If there are whitespace errors, print the offending file names and fail. +exec git diff-index --check --cached $against -- diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/hooks/pre-push.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/hooks/pre-push.sample Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,53 @@ +#!/bin/sh + +# An example hook script to verify what is about to be pushed. Called by "git +# push" after it has checked the remote status, but before anything has been +# pushed. If this script exits with a non-zero status nothing will be pushed. +# +# This hook is called with the following parameters: +# +# $1 -- Name of the remote to which the push is being done +# $2 -- URL to which the push is being done +# +# If pushing without using a named remote those arguments will be equal. +# +# Information about the commits which are being pushed is supplied as lines to +# the standard input in the form: +# +# +# +# This sample shows how to prevent push of commits where the log message starts +# with "WIP" (work in progress). + +remote="$1" +url="$2" + +z40=0000000000000000000000000000000000000000 + +IFS=' ' +while read local_ref local_sha remote_ref remote_sha +do + if [ "$local_sha" = $z40 ] + then + # Handle delete + else + if [ "$remote_sha" = $z40 ] + then + # New branch, examine all commits + range="$local_sha" + else + # Update to existing branch, examine new commits + range="$remote_sha..$local_sha" + fi + + # Check for WIP commit + commit=`git rev-list -n 1 --grep '^WIP' "$range"` + if [ -n "$commit" ] + then + echo "Found WIP commit in $local_ref, not pushing" + exit 1 + fi + fi +done + +exit 0 diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/hooks/pre-rebase.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/hooks/pre-rebase.sample Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,169 @@ +#!/bin/sh +# +# Copyright (c) 2006, 2008 Junio C Hamano +# +# The "pre-rebase" hook is run just before "git rebase" starts doing +# its job, and can prevent the command from running by exiting with +# non-zero status. +# +# The hook is called with the following parameters: +# +# $1 -- the upstream the series was forked from. +# $2 -- the branch being rebased (or empty when rebasing the current branch). +# +# This sample shows how to prevent topic branches that are already +# merged to 'next' branch from getting rebased, because allowing it +# would result in rebasing already published history. + +publish=next +basebranch="$1" +if test "$#" = 2 +then + topic="refs/heads/$2" +else + topic=`git symbolic-ref HEAD` || + exit 0 ;# we do not interrupt rebasing detached HEAD +fi + +case "$topic" in +refs/heads/??/*) + ;; +*) + exit 0 ;# we do not interrupt others. + ;; +esac + +# Now we are dealing with a topic branch being rebased +# on top of master. Is it OK to rebase it? + +# Does the topic really exist? +git show-ref -q "$topic" || { + echo >&2 "No such branch $topic" + exit 1 +} + +# Is topic fully merged to master? +not_in_master=`git rev-list --pretty=oneline ^master "$topic"` +if test -z "$not_in_master" +then + echo >&2 "$topic is fully merged to master; better remove it." + exit 1 ;# we could allow it, but there is no point. +fi + +# Is topic ever merged to next? If so you should not be rebasing it. +only_next_1=`git rev-list ^master "^$topic" ${publish} | sort` +only_next_2=`git rev-list ^master ${publish} | sort` +if test "$only_next_1" = "$only_next_2" +then + not_in_topic=`git rev-list "^$topic" master` + if test -z "$not_in_topic" + then + echo >&2 "$topic is already up-to-date with master" + exit 1 ;# we could allow it, but there is no point. + else + exit 0 + fi +else + not_in_next=`git rev-list --pretty=oneline ^${publish} "$topic"` + /usr/bin/perl -e ' + my $topic = $ARGV[0]; + my $msg = "* $topic has commits already merged to public branch:\n"; + my (%not_in_next) = map { + /^([0-9a-f]+) /; + ($1 => 1); + } split(/\n/, $ARGV[1]); + for my $elem (map { + /^([0-9a-f]+) (.*)$/; + [$1 => $2]; + } split(/\n/, $ARGV[2])) { + if (!exists $not_in_next{$elem->[0]}) { + if ($msg) { + print STDERR $msg; + undef $msg; + } + print STDERR " $elem->[1]\n"; + } + } + ' "$topic" "$not_in_next" "$not_in_master" + exit 1 +fi + +exit 0 + +################################################################ + +This sample hook safeguards topic branches that have been +published from being rewound. + +The workflow assumed here is: + + * Once a topic branch forks from "master", "master" is never + merged into it again (either directly or indirectly). + + * Once a topic branch is fully cooked and merged into "master", + it is deleted. If you need to build on top of it to correct + earlier mistakes, a new topic branch is created by forking at + the tip of the "master". This is not strictly necessary, but + it makes it easier to keep your history simple. + + * Whenever you need to test or publish your changes to topic + branches, merge them into "next" branch. + +The script, being an example, hardcodes the publish branch name +to be "next", but it is trivial to make it configurable via +$GIT_DIR/config mechanism. + +With this workflow, you would want to know: + +(1) ... if a topic branch has ever been merged to "next". Young + topic branches can have stupid mistakes you would rather + clean up before publishing, and things that have not been + merged into other branches can be easily rebased without + affecting other people. But once it is published, you would + not want to rewind it. + +(2) ... if a topic branch has been fully merged to "master". + Then you can delete it. More importantly, you should not + build on top of it -- other people may already want to + change things related to the topic as patches against your + "master", so if you need further changes, it is better to + fork the topic (perhaps with the same name) afresh from the + tip of "master". + +Let's look at this example: + + o---o---o---o---o---o---o---o---o---o "next" + / / / / + / a---a---b A / / + / / / / + / / c---c---c---c B / + / / / \ / + / / / b---b C \ / + / / / / \ / + ---o---o---o---o---o---o---o---o---o---o---o "master" + + +A, B and C are topic branches. + + * A has one fix since it was merged up to "next". + + * B has finished. It has been fully merged up to "master" and "next", + and is ready to be deleted. + + * C has not merged to "next" at all. + +We would want to allow C to be rebased, refuse A, and encourage +B to be deleted. + +To compute (1): + + git rev-list ^master ^topic next + git rev-list ^master next + + if these match, topic has not merged in next at all. + +To compute (2): + + git rev-list master..topic + + if this is empty, it is fully merged to "master". diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/hooks/prepare-commit-msg.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/hooks/prepare-commit-msg.sample Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,36 @@ +#!/bin/sh +# +# An example hook script to prepare the commit log message. +# Called by "git commit" with the name of the file that has the +# commit message, followed by the description of the commit +# message's source. The hook's purpose is to edit the commit +# message file. If the hook fails with a non-zero status, +# the commit is aborted. +# +# To enable this hook, rename this file to "prepare-commit-msg". + +# This hook includes three examples. The first comments out the +# "Conflicts:" part of a merge commit. +# +# The second includes the output of "git diff --name-status -r" +# into the message, just before the "git status" output. It is +# commented because it doesn't cope with --amend or with squashed +# commits. +# +# The third example adds a Signed-off-by line to the message, that can +# still be edited. This is rarely a good idea. + +case "$2,$3" in + merge,) + /usr/bin/perl -i.bak -ne 's/^/# /, s/^# #/#/ if /^Conflicts/ .. /#/; print' "$1" ;; + +# ,|template,) +# /usr/bin/perl -i.bak -pe ' +# print "\n" . `git diff --cached --name-status -r` +# if /^#/ && $first++ == 0' "$1" ;; + + *) ;; +esac + +# SOB=$(git var GIT_AUTHOR_IDENT | sed -n 's/^\(.*>\).*$/Signed-off-by: \1/p') +# grep -qs "^$SOB" "$1" || echo "$SOB" >> "$1" diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/hooks/update.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/hooks/update.sample Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,128 @@ +#!/bin/sh +# +# An example hook script to blocks unannotated tags from entering. +# Called by "git receive-pack" with arguments: refname sha1-old sha1-new +# +# To enable this hook, rename this file to "update". +# +# Config +# ------ +# hooks.allowunannotated +# This boolean sets whether unannotated tags will be allowed into the +# repository. By default they won't be. +# hooks.allowdeletetag +# This boolean sets whether deleting tags will be allowed in the +# repository. By default they won't be. +# hooks.allowmodifytag +# This boolean sets whether a tag may be modified after creation. By default +# it won't be. +# hooks.allowdeletebranch +# This boolean sets whether deleting branches will be allowed in the +# repository. By default they won't be. +# hooks.denycreatebranch +# This boolean sets whether remotely creating branches will be denied +# in the repository. By default this is allowed. +# + +# --- Command line +refname="$1" +oldrev="$2" +newrev="$3" + +# --- Safety check +if [ -z "$GIT_DIR" ]; then + echo "Don't run this script from the command line." >&2 + echo " (if you want, you could supply GIT_DIR then run" >&2 + echo " $0 )" >&2 + exit 1 +fi + +if [ -z "$refname" -o -z "$oldrev" -o -z "$newrev" ]; then + echo "Usage: $0 " >&2 + exit 1 +fi + +# --- Config +allowunannotated=$(git config --bool hooks.allowunannotated) +allowdeletebranch=$(git config --bool hooks.allowdeletebranch) +denycreatebranch=$(git config --bool hooks.denycreatebranch) +allowdeletetag=$(git config --bool hooks.allowdeletetag) +allowmodifytag=$(git config --bool hooks.allowmodifytag) + +# check for no description +projectdesc=$(sed -e '1q' "$GIT_DIR/description") +case "$projectdesc" in +"Unnamed repository"* | "") + echo "*** Project description file hasn't been set" >&2 + exit 1 + ;; +esac + +# --- Check types +# if $newrev is 0000...0000, it's a commit to delete a ref. +zero="0000000000000000000000000000000000000000" +if [ "$newrev" = "$zero" ]; then + newrev_type=delete +else + newrev_type=$(git cat-file -t $newrev) +fi + +case "$refname","$newrev_type" in + refs/tags/*,commit) + # un-annotated tag + short_refname=${refname##refs/tags/} + if [ "$allowunannotated" != "true" ]; then + echo "*** The un-annotated tag, $short_refname, is not allowed in this repository" >&2 + echo "*** Use 'git tag [ -a | -s ]' for tags you want to propagate." >&2 + exit 1 + fi + ;; + refs/tags/*,delete) + # delete tag + if [ "$allowdeletetag" != "true" ]; then + echo "*** Deleting a tag is not allowed in this repository" >&2 + exit 1 + fi + ;; + refs/tags/*,tag) + # annotated tag + if [ "$allowmodifytag" != "true" ] && git rev-parse $refname > /dev/null 2>&1 + then + echo "*** Tag '$refname' already exists." >&2 + echo "*** Modifying a tag is not allowed in this repository." >&2 + exit 1 + fi + ;; + refs/heads/*,commit) + # branch + if [ "$oldrev" = "$zero" -a "$denycreatebranch" = "true" ]; then + echo "*** Creating a branch is not allowed in this repository" >&2 + exit 1 + fi + ;; + refs/heads/*,delete) + # delete branch + if [ "$allowdeletebranch" != "true" ]; then + echo "*** Deleting a branch is not allowed in this repository" >&2 + exit 1 + fi + ;; + refs/remotes/*,commit) + # tracking branch + ;; + refs/remotes/*,delete) + # delete tracking branch + if [ "$allowdeletebranch" != "true" ]; then + echo "*** Deleting a tracking branch is not allowed in this repository" >&2 + exit 1 + fi + ;; + *) + # Anything else (is there anything else?) + echo "*** Update hook: unknown type of update to ref $refname of type $newrev_type" >&2 + exit 1 + ;; +esac + +# --- Finished +exit 0 diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/index Binary file BSseeker2/.git/index has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/info/exclude --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/info/exclude Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,7 @@ +# git ls-files --others --exclude-from=.git/info/exclude +# Lines that start with '#' are comments. +# For a project mostly in C, the following would be a good set of +# exclude patterns (uncomment them if you want to use them): +# *.[oa] +# *~ +.DS_Store diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/logs/HEAD --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/logs/HEAD Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,17 @@ +0000000000000000000000000000000000000000 6d252c627030a29eff9cc938f74c0be27ec853f0 Weilong Guo 1365490332 -0700 clone: from https://github.com/BSSeeker/BSseeker2.git +6d252c627030a29eff9cc938f74c0be27ec853f0 565f548bc9a4ffcad63323481e8b03cb78a84b0a weilong 1365495166 -0700 commit: update by Weilong +565f548bc9a4ffcad63323481e8b03cb78a84b0a 1373f3c499ff4b6fb8ddff39e8859fa5326d0098 weilong 1366217039 -0700 commit: Add version track; +1373f3c499ff4b6fb8ddff39e8859fa5326d0098 ffe723a1f313914590bb10496b6d418d8a2ea614 weilong 1366521472 -0700 commit: RRBS un-directional +ffe723a1f313914590bb10496b6d418d8a2ea614 203e3ea9ba50c325d53e45f6d31686b6b23bba41 weilong 1368857623 -0700 commit: Multiple-sites updates +203e3ea9ba50c325d53e45f6d31686b6b23bba41 6aeb6fc0a2180cd14c59d772e05c4c3139f37f2e weilong 1368872339 -0700 commit: Clean-up/Galaxy/Test_data +6aeb6fc0a2180cd14c59d772e05c4c3139f37f2e 4a0fe7b0dfa14e5b4b62b5ead7a624f7361fee12 weilong 1368872394 -0700 commit: Clean-up/galaxy/test_data +4a0fe7b0dfa14e5b4b62b5ead7a624f7361fee12 3544b8bb1e5666d100f7652fc63b985187990e49 weilong 1369030442 -0700 commit: Fix bug on utils.py; Increased version; Add UPDATE +3544b8bb1e5666d100f7652fc63b985187990e49 39508bee8506ce06feead7599a53ffd09cc89198 weilong 1369355033 -0700 commit: support diff indexes +39508bee8506ce06feead7599a53ffd09cc89198 99e2633f8338b3c1f44cf3d1e2fa7ed5e3ce73ed weilong 1369785459 -0700 commit: Update Readme +99e2633f8338b3c1f44cf3d1e2fa7ed5e3ce73ed 823880bb2ebba5ae6bd1c7de7dfc4ce03935155b weilong 1369876356 -0700 commit: fix bug in galaxy; chmod +x *.py +823880bb2ebba5ae6bd1c7de7dfc4ce03935155b 7664a927ba6f12b023f6cd72dc407e616c5b85e5 weilong 1370044685 -0700 commit: fix galaxy xml +7664a927ba6f12b023f6cd72dc407e616c5b85e5 3c14c7a6fc9fd01ef7ffc3bc3b16153e20fda931 weilong 1371202921 -0700 commit: fix bugs for RRBS tag +3c14c7a6fc9fd01ef7ffc3bc3b16153e20fda931 58de749ac6ebe90f32db2c89cdd72507669695e2 weilong 1371366774 -0700 commit: fix bugs for RRBS tag +58de749ac6ebe90f32db2c89cdd72507669695e2 5ffa6c0f3f3d1b78e2899cc1ad9636affb5f2330 weilong 1375139287 -0700 commit: v2.0.4 +5ffa6c0f3f3d1b78e2899cc1ad9636affb5f2330 0c877f0900535fbee6909dc1591e8ad0eac29c52 weilong 1375307754 -0700 commit: fix a bug +0c877f0900535fbee6909dc1591e8ad0eac29c52 85a936d6dab3f3c5585bcab2eee7b18bbc7d7fd8 weilong 1383633776 +0800 commit: V2.0.5 diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/logs/refs/heads/master --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/logs/refs/heads/master Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,17 @@ +0000000000000000000000000000000000000000 6d252c627030a29eff9cc938f74c0be27ec853f0 Weilong Guo 1365490332 -0700 clone: from https://github.com/BSSeeker/BSseeker2.git +6d252c627030a29eff9cc938f74c0be27ec853f0 565f548bc9a4ffcad63323481e8b03cb78a84b0a weilong 1365495166 -0700 commit: update by Weilong +565f548bc9a4ffcad63323481e8b03cb78a84b0a 1373f3c499ff4b6fb8ddff39e8859fa5326d0098 weilong 1366217039 -0700 commit: Add version track; +1373f3c499ff4b6fb8ddff39e8859fa5326d0098 ffe723a1f313914590bb10496b6d418d8a2ea614 weilong 1366521472 -0700 commit: RRBS un-directional +ffe723a1f313914590bb10496b6d418d8a2ea614 203e3ea9ba50c325d53e45f6d31686b6b23bba41 weilong 1368857623 -0700 commit: Multiple-sites updates +203e3ea9ba50c325d53e45f6d31686b6b23bba41 6aeb6fc0a2180cd14c59d772e05c4c3139f37f2e weilong 1368872339 -0700 commit: Clean-up/Galaxy/Test_data +6aeb6fc0a2180cd14c59d772e05c4c3139f37f2e 4a0fe7b0dfa14e5b4b62b5ead7a624f7361fee12 weilong 1368872394 -0700 commit: Clean-up/galaxy/test_data +4a0fe7b0dfa14e5b4b62b5ead7a624f7361fee12 3544b8bb1e5666d100f7652fc63b985187990e49 weilong 1369030442 -0700 commit: Fix bug on utils.py; Increased version; Add UPDATE +3544b8bb1e5666d100f7652fc63b985187990e49 39508bee8506ce06feead7599a53ffd09cc89198 weilong 1369355033 -0700 commit: support diff indexes +39508bee8506ce06feead7599a53ffd09cc89198 99e2633f8338b3c1f44cf3d1e2fa7ed5e3ce73ed weilong 1369785459 -0700 commit: Update Readme +99e2633f8338b3c1f44cf3d1e2fa7ed5e3ce73ed 823880bb2ebba5ae6bd1c7de7dfc4ce03935155b weilong 1369876356 -0700 commit: fix bug in galaxy; chmod +x *.py +823880bb2ebba5ae6bd1c7de7dfc4ce03935155b 7664a927ba6f12b023f6cd72dc407e616c5b85e5 weilong 1370044685 -0700 commit: fix galaxy xml +7664a927ba6f12b023f6cd72dc407e616c5b85e5 3c14c7a6fc9fd01ef7ffc3bc3b16153e20fda931 weilong 1371202921 -0700 commit: fix bugs for RRBS tag +3c14c7a6fc9fd01ef7ffc3bc3b16153e20fda931 58de749ac6ebe90f32db2c89cdd72507669695e2 weilong 1371366774 -0700 commit: fix bugs for RRBS tag +58de749ac6ebe90f32db2c89cdd72507669695e2 5ffa6c0f3f3d1b78e2899cc1ad9636affb5f2330 weilong 1375139287 -0700 commit: v2.0.4 +5ffa6c0f3f3d1b78e2899cc1ad9636affb5f2330 0c877f0900535fbee6909dc1591e8ad0eac29c52 weilong 1375307754 -0700 commit: fix a bug +0c877f0900535fbee6909dc1591e8ad0eac29c52 85a936d6dab3f3c5585bcab2eee7b18bbc7d7fd8 weilong 1383633776 +0800 commit: V2.0.5 diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/logs/refs/remotes/origin/HEAD --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/logs/refs/remotes/origin/HEAD Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,1 @@ +0000000000000000000000000000000000000000 6d252c627030a29eff9cc938f74c0be27ec853f0 Weilong Guo 1365490332 -0700 clone: from https://github.com/BSSeeker/BSseeker2.git diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/logs/refs/remotes/origin/master --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/logs/refs/remotes/origin/master Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,15 @@ +6d252c627030a29eff9cc938f74c0be27ec853f0 565f548bc9a4ffcad63323481e8b03cb78a84b0a weilong 1365495210 -0700 push: fast forward +565f548bc9a4ffcad63323481e8b03cb78a84b0a 1373f3c499ff4b6fb8ddff39e8859fa5326d0098 weilong 1366217075 -0700 push: fast forward +1373f3c499ff4b6fb8ddff39e8859fa5326d0098 ffe723a1f313914590bb10496b6d418d8a2ea614 weilong 1366521489 -0700 update by push +ffe723a1f313914590bb10496b6d418d8a2ea614 203e3ea9ba50c325d53e45f6d31686b6b23bba41 weilong 1368857637 -0700 update by push +203e3ea9ba50c325d53e45f6d31686b6b23bba41 4a0fe7b0dfa14e5b4b62b5ead7a624f7361fee12 weilong 1368872449 -0700 update by push +4a0fe7b0dfa14e5b4b62b5ead7a624f7361fee12 3544b8bb1e5666d100f7652fc63b985187990e49 weilong 1369030453 -0700 update by push +3544b8bb1e5666d100f7652fc63b985187990e49 39508bee8506ce06feead7599a53ffd09cc89198 weilong 1369355040 -0700 update by push +39508bee8506ce06feead7599a53ffd09cc89198 99e2633f8338b3c1f44cf3d1e2fa7ed5e3ce73ed weilong 1369785465 -0700 update by push +99e2633f8338b3c1f44cf3d1e2fa7ed5e3ce73ed 823880bb2ebba5ae6bd1c7de7dfc4ce03935155b weilong 1369876368 -0700 update by push +823880bb2ebba5ae6bd1c7de7dfc4ce03935155b 7664a927ba6f12b023f6cd72dc407e616c5b85e5 weilong 1370044698 -0700 update by push +7664a927ba6f12b023f6cd72dc407e616c5b85e5 3c14c7a6fc9fd01ef7ffc3bc3b16153e20fda931 weilong 1371202931 -0700 update by push +3c14c7a6fc9fd01ef7ffc3bc3b16153e20fda931 58de749ac6ebe90f32db2c89cdd72507669695e2 weilong 1371366804 -0700 update by push +58de749ac6ebe90f32db2c89cdd72507669695e2 5ffa6c0f3f3d1b78e2899cc1ad9636affb5f2330 weilong 1375139295 -0700 update by push +5ffa6c0f3f3d1b78e2899cc1ad9636affb5f2330 0c877f0900535fbee6909dc1591e8ad0eac29c52 weilong 1375307762 -0700 update by push +0c877f0900535fbee6909dc1591e8ad0eac29c52 85a936d6dab3f3c5585bcab2eee7b18bbc7d7fd8 weilong 1383633798 +0800 update by push diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/00/617de95ccde6a4587513924dd8c377c282372a Binary file BSseeker2/.git/objects/00/617de95ccde6a4587513924dd8c377c282372a has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/02/23d68962616c5035b8f8a49d09eb9fea3bd4d6 Binary file BSseeker2/.git/objects/02/23d68962616c5035b8f8a49d09eb9fea3bd4d6 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/02/bff5729f69b469b4ec5a1c6df7d83a0c3f84df Binary file BSseeker2/.git/objects/02/bff5729f69b469b4ec5a1c6df7d83a0c3f84df has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/02/f21118110bf12e3995381d0b5388ecc39f396b Binary file BSseeker2/.git/objects/02/f21118110bf12e3995381d0b5388ecc39f396b has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/04/bd24805dff9825629951311783935221cb83bf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/objects/04/bd24805dff9825629951311783935221cb83bf Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,4 @@ +x+)JMU054c040031Qp  +fw}nyP]0P[fNIjQPjbJ^A%>Y3v~! $.]攸tO_۷/&@TI2N 礴>}b)\If^Jj[7&8^l!sedWP)NMN-2lVl}RfNIwzU*ZʓJ3sR@_na-{dfS vv`*ONɉM-ɨI,|13mJcm8Ғ̜bŸh +`ݺgǍ+NC'$VT2(Ni[4r}jm[C(I-.OI,Id|XܫY \ No newline at end of file diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/08/a0832c5412197e32227afc7d6258f33e30c5b7 Binary file BSseeker2/.git/objects/08/a0832c5412197e32227afc7d6258f33e30c5b7 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/0a/043f3a26f2ffb733bf9d53aa494fc82a50f3b8 Binary file BSseeker2/.git/objects/0a/043f3a26f2ffb733bf9d53aa494fc82a50f3b8 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/0b/a9fcc6f7d8306fdd778b555380015a7e13df2b Binary file BSseeker2/.git/objects/0b/a9fcc6f7d8306fdd778b555380015a7e13df2b has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/0c/7195e7ec701f0acea6d3068335b72f44ef6606 Binary file BSseeker2/.git/objects/0c/7195e7ec701f0acea6d3068335b72f44ef6606 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/0c/877f0900535fbee6909dc1591e8ad0eac29c52 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/objects/0c/877f0900535fbee6909dc1591e8ad0eac29c52 Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,2 @@ +x[ +0E*fJɣ2fb1RRt qp24R ga% fȳ%sXQ/sZPp69NN)1S@"ًEԊ˲gkѲ^uYGa_*Y>@zE \ No newline at end of file diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/10/9a8400e787d00b1f566e9dd58f6a5301ae0b81 Binary file BSseeker2/.git/objects/10/9a8400e787d00b1f566e9dd58f6a5301ae0b81 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/11/35d4173810162c03f791537225d87a45b02f88 Binary file BSseeker2/.git/objects/11/35d4173810162c03f791537225d87a45b02f88 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/13/73f3c499ff4b6fb8ddff39e8859fa5326d0098 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/objects/13/73f3c499ff4b6fb8ddff39e8859fa5326d0098 Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,1 @@ +xm0Dsfۀ EA`rŀCm0̣ZJ}xf&s$zUFaVוk9񍍏λiF; C0(Q?zl=_n~Z@ﵚ$')hǿ'-%xrz@oH_q#vvp ?, ~mWx \ No newline at end of file diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/15/910301bc200b6f069dd75caa81101064c2fccf Binary file BSseeker2/.git/objects/15/910301bc200b6f069dd75caa81101064c2fccf has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/17/109affd07fb12192968a793a7f362231132b24 Binary file BSseeker2/.git/objects/17/109affd07fb12192968a793a7f362231132b24 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/18/3503abbe78455abef73ad3e1a9c7615414b8a8 Binary file BSseeker2/.git/objects/18/3503abbe78455abef73ad3e1a9c7615414b8a8 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/19/bf07adb9f50b72bc4fa40ffe059f17c84826fd Binary file BSseeker2/.git/objects/19/bf07adb9f50b72bc4fa40ffe059f17c84826fd has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/1d/6a5d7a3f6280296c748b5746af2cf97a242a77 Binary file BSseeker2/.git/objects/1d/6a5d7a3f6280296c748b5746af2cf97a242a77 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/1d/953d2c1096b42550676cf3c89852b5e2effa91 Binary file BSseeker2/.git/objects/1d/953d2c1096b42550676cf3c89852b5e2effa91 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/1f/6bef43eec7df888917650e5866ab1cc751fd72 Binary file BSseeker2/.git/objects/1f/6bef43eec7df888917650e5866ab1cc751fd72 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/1f/b3a026502e7eb0b3e9a214424e7469c558fde1 Binary file BSseeker2/.git/objects/1f/b3a026502e7eb0b3e9a214424e7469c558fde1 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/20/3e3ea9ba50c325d53e45f6d31686b6b23bba41 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/objects/20/3e3ea9ba50c325d53e45f6d31686b6b23bba41 Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,1 @@ +x 0DsVjABH+k2i?>fxÃڲ.5¥o2Ƭ٧D:E"E^<=hJ̙F#;AE H390jp=|_ zr.F9(8xλR~K˙jQ;- J)ΰSr?ihu&^RR \ No newline at end of file diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/34/dfdab6463a8270332c510b91e669a40e7bde95 Binary file BSseeker2/.git/objects/34/dfdab6463a8270332c510b91e669a40e7bde95 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/35/44b8bb1e5666d100f7652fc63b985187990e49 Binary file BSseeker2/.git/objects/35/44b8bb1e5666d100f7652fc63b985187990e49 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/36/3573c4a1889f8a89136bea77502da4e4caf31d Binary file BSseeker2/.git/objects/36/3573c4a1889f8a89136bea77502da4e4caf31d has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/36/e0c12ffea02da8b86f0eb70d13eb4bd1c5e52f Binary file BSseeker2/.git/objects/36/e0c12ffea02da8b86f0eb70d13eb4bd1c5e52f has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/37/15a507ba242e7ffee322e83bb88495bffdfa84 Binary file BSseeker2/.git/objects/37/15a507ba242e7ffee322e83bb88495bffdfa84 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/38/858de2e6dacf0ebf77379a9cb7a1f9115d1f58 Binary file BSseeker2/.git/objects/38/858de2e6dacf0ebf77379a9cb7a1f9115d1f58 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/39/508bee8506ce06feead7599a53ffd09cc89198 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/objects/39/508bee8506ce06feead7599a53ffd09cc89198 Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,4 @@ +xQ + D)-B)J5b =~7`xj)ti;3m Ib| +h=!1ER^'#mym?z:碒2 z"ɆtWy ˔[ +a!URN/qn[Ĝ5IH \ No newline at end of file diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/39/d7071eae44ec7b52c624a4b24c7c67101f1b68 Binary file BSseeker2/.git/objects/39/d7071eae44ec7b52c624a4b24c7c67101f1b68 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/3a/fc1efb6bad2ca078808ac2df9006ca3ff82a8c Binary file BSseeker2/.git/objects/3a/fc1efb6bad2ca078808ac2df9006ca3ff82a8c has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/3b/4abe03399682b05bdffc0b5601127cbcf59234 Binary file BSseeker2/.git/objects/3b/4abe03399682b05bdffc0b5601127cbcf59234 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/3b/f7e94c85fcc3d67cebd0e19a84bfc00aa8605f Binary file BSseeker2/.git/objects/3b/f7e94c85fcc3d67cebd0e19a84bfc00aa8605f has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/3c/14c7a6fc9fd01ef7ffc3bc3b16153e20fda931 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/objects/3c/14c7a6fc9fd01ef7ffc3bc3b16153e20fda931 Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,1 @@ +xAj0E) #PBH#`GőI/z>ǺsWTArgU 6KcBϣ#ᐭt]1KG`C2x[7x{}-+9#Ѝ s;Mh\цI \ No newline at end of file diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/3c/274ddc1164d826c6e9dddb7faf80d5d31f894c Binary file BSseeker2/.git/objects/3c/274ddc1164d826c6e9dddb7faf80d5d31f894c has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/3e/976df32e78806e4ff93a8e75059abdd0ea7181 Binary file BSseeker2/.git/objects/3e/976df32e78806e4ff93a8e75059abdd0ea7181 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/3e/f678dd2144fe82b422f173440c6f31a3430286 Binary file BSseeker2/.git/objects/3e/f678dd2144fe82b422f173440c6f31a3430286 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/3f/cc7ab24c5c00cb2cfa7da07cb73427f00a6a93 Binary file BSseeker2/.git/objects/3f/cc7ab24c5c00cb2cfa7da07cb73427f00a6a93 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/3f/cd4fd599b9ec2a2ebf8da4875fdee78051e7cd Binary file BSseeker2/.git/objects/3f/cd4fd599b9ec2a2ebf8da4875fdee78051e7cd has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/41/2eeda78dc9de1186c2e0e1526764af82ab3431 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/objects/41/2eeda78dc9de1186c2e0e1526764af82ab3431 Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,2 @@ +xuN1 YԭCXn@:!QbAW?, 9qUD]\&xBcB Qv֭ +O.0A<%+1pr.̅߯`@odXT?)'o"a+AK6k8V4y&ofk)M?Z7} \ No newline at end of file diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/41/ae40d93e4224bc6438f6ef14c15a2846138065 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/objects/41/ae40d93e4224bc6438f6ef14c15a2846138065 Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,2 @@ +xM +0]򟀈n<7xI_jmJLoow 7<ΥCoD(!!1J% &,2YbsK:ÎgkhAp֟qo*S]F~8cN.?`{_טR v2:'M4 \ No newline at end of file diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/41/c210ea7e3658ae2eb5d2da7be475da7d548894 Binary file BSseeker2/.git/objects/41/c210ea7e3658ae2eb5d2da7be475da7d548894 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/41/f0f93dc10a62d752d9ee5eb43effcd2dbe19fe Binary file BSseeker2/.git/objects/41/f0f93dc10a62d752d9ee5eb43effcd2dbe19fe has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/41/fc8bda8be2cb62409902674879a6ca93d84cc0 Binary file BSseeker2/.git/objects/41/fc8bda8be2cb62409902674879a6ca93d84cc0 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/43/5a4593e613fc1025f92891a8f08ac771ae1398 Binary file BSseeker2/.git/objects/43/5a4593e613fc1025f92891a8f08ac771ae1398 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/46/87efc5d890418714cba363381cd4b311932fea Binary file BSseeker2/.git/objects/46/87efc5d890418714cba363381cd4b311932fea has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/46/ebbc533f9479de561eefbf9feba9ce603e996c Binary file BSseeker2/.git/objects/46/ebbc533f9479de561eefbf9feba9ce603e996c has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/49/738c43a9d49817fac7d108233edfa40af27bdb Binary file BSseeker2/.git/objects/49/738c43a9d49817fac7d108233edfa40af27bdb has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/4a/0fe7b0dfa14e5b4b62b5ead7a624f7361fee12 Binary file BSseeker2/.git/objects/4a/0fe7b0dfa14e5b4b62b5ead7a624f7361fee12 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/4a/f64fdf97c823621c4259e7319c6484cbcbc8a5 Binary file BSseeker2/.git/objects/4a/f64fdf97c823621c4259e7319c6484cbcbc8a5 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/4c/b483bc8f85c2254f2f4adbddb13ca48d9d3048 Binary file BSseeker2/.git/objects/4c/b483bc8f85c2254f2f4adbddb13ca48d9d3048 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/4d/9abcd6467711a66114b3115856cf2e1642b18f Binary file BSseeker2/.git/objects/4d/9abcd6467711a66114b3115856cf2e1642b18f has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/4e/6d15d27856015a2bf2fd6e403759db1e9269d6 Binary file BSseeker2/.git/objects/4e/6d15d27856015a2bf2fd6e403759db1e9269d6 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/4e/cf1ddb295944cf8b4b45aafeed8c19bd0f3b06 Binary file BSseeker2/.git/objects/4e/cf1ddb295944cf8b4b45aafeed8c19bd0f3b06 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/50/cfd7aeafd7d1d2300fda58c59e2d329c26d798 Binary file BSseeker2/.git/objects/50/cfd7aeafd7d1d2300fda58c59e2d329c26d798 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/51/0ec79f82b6b5a087fd18a04fe11944f7aede94 Binary file BSseeker2/.git/objects/51/0ec79f82b6b5a087fd18a04fe11944f7aede94 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/52/37b06ffa3a3f772f087c79e58746e1008813e9 Binary file BSseeker2/.git/objects/52/37b06ffa3a3f772f087c79e58746e1008813e9 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/53/817ffe5f20347c0b6e705b574b2c6a95a93941 Binary file BSseeker2/.git/objects/53/817ffe5f20347c0b6e705b574b2c6a95a93941 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/56/5f548bc9a4ffcad63323481e8b03cb78a84b0a --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/objects/56/5f548bc9a4ffcad63323481e8b03cb78a84b0a Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,2 @@ +x[ +0E*fdLq~1֔"ފnÁý؀m5Kq;2Hd6]Z*ǣ@^J)y%BNRoMAv+GMv /< \ No newline at end of file diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/68/bbb4104520295cd1bba6d38dc4c8ef557578e1 Binary file BSseeker2/.git/objects/68/bbb4104520295cd1bba6d38dc4c8ef557578e1 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/6a/eb6fc0a2180cd14c59d772e05c4c3139f37f2e Binary file BSseeker2/.git/objects/6a/eb6fc0a2180cd14c59d772e05c4c3139f37f2e has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/6a/f6e8c4f3e98fbe0a3668cc0bd49b400f78c2b7 Binary file BSseeker2/.git/objects/6a/f6e8c4f3e98fbe0a3668cc0bd49b400f78c2b7 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/6b/b6ae1a79da1e04586d6f3273b012fc995bf33f Binary file BSseeker2/.git/objects/6b/b6ae1a79da1e04586d6f3273b012fc995bf33f has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/6c/a474f44b26d591aab232bb4f543094f60662fa Binary file BSseeker2/.git/objects/6c/a474f44b26d591aab232bb4f543094f60662fa has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/6d/252c627030a29eff9cc938f74c0be27ec853f0 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/objects/6d/252c627030a29eff9cc938f74c0be27ec853f0 Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,2 @@ +xM +0F]23"5))^ܻރ_աil9kǘhdžzر%1Qo.bPfo`HIQ A^uE_4_Ky {\X=z椾@F \ No newline at end of file diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/6d/84df45d981b431b64262b0f9fc1cb4fca7d424 Binary file BSseeker2/.git/objects/6d/84df45d981b431b64262b0f9fc1cb4fca7d424 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/6e/4b91491dbf503844b132902207b10bb675a073 Binary file BSseeker2/.git/objects/6e/4b91491dbf503844b132902207b10bb675a073 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/6e/5dc58b1699ff1e665477c08becaf9435b42677 Binary file BSseeker2/.git/objects/6e/5dc58b1699ff1e665477c08becaf9435b42677 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/71/5f8471c2dc3e60e99dfc042aed1b46bb9cc095 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/objects/71/5f8471c2dc3e60e99dfc042aed1b46bb9cc095 Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,1 @@ +x+)JMU04d040031Q,+d([jtcBO0YUVTTTRi˟uU%=8 tt[\4c.^vȉc4 \ No newline at end of file diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/71/860c94d65f6a776e5d62cdbb682d096bd7ced1 Binary file BSseeker2/.git/objects/71/860c94d65f6a776e5d62cdbb682d096bd7ced1 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/71/af4f4bcb6d83a044fe02aff92802eb493274c4 Binary file BSseeker2/.git/objects/71/af4f4bcb6d83a044fe02aff92802eb493274c4 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/72/e59cf052f86f276e4667b637357f2e57e999e1 Binary file BSseeker2/.git/objects/72/e59cf052f86f276e4667b637357f2e57e999e1 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/74/0ea86532d893a100102756643fa0bbcac034a8 Binary file BSseeker2/.git/objects/74/0ea86532d893a100102756643fa0bbcac034a8 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/74/11c2b2e56b60e4f412a81af1b7607d83ff0617 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/objects/74/11c2b2e56b60e4f412a81af1b7607d83ff0617 Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,5 @@ +x]Ro0߳Ov{Lb njCN Щwsչ +O5R[=c; ¢x(c'H̋ w_bG6f8Yzh`78eLa0&߀0qUPBzva;r0@齫mxиz<>3׉DҘcG4+xuc0ؚ0"}ݍ i]wd 4>*=: \c5Xuַ41`yD>>:uO^z(yщ0=Rb(8lbm@R~p]^Zƒ_1We^zPIe+ߖ*s ymϨt\-;8a&PKJ,`2 xY;WVv(=K~,@(Q+&כT +,N̞asY];GP^%W2za.s6\ioS`UH l&B~DV(V_ew \ No newline at end of file diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/75/2b598979416c079bebe57d9dee4fc00d816a5d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/objects/75/2b598979416c079bebe57d9dee4fc00d816a5d Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,3 @@ +xmSn09_qFIC!kih`MBS;ci}=&A@*G>s͕!UpFq4 Y ,H"ϐ7%H EuL#H9eJ=%|jT v5g^*]WQaKfۊP[1?&tj"p> + @,6ZcءqRpkHV0)YjD`}<pؘ̒nOJ(Hraݾ+?GPBvZjif]59{)hAARW 90&ʱUS,ьODy#e}ͨ +hP_%m;,QM^}zBq7F-^ d !gNk}!?>^oxkVw>:rYYp>D~`Mgr 0̸͖,x,h # 9e=pBc)TLa&qa* s8(*_ƃARܿd)!np1`~Atx~$~hՐDs@ J \ No newline at end of file diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/76/64a927ba6f12b023f6cd72dc407e616c5b85e5 Binary file BSseeker2/.git/objects/76/64a927ba6f12b023f6cd72dc407e616c5b85e5 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/77/2dab1d09e09848fe92a238fc1413ac86b78e49 Binary file BSseeker2/.git/objects/77/2dab1d09e09848fe92a238fc1413ac86b78e49 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/77/5412491f6a25e65dfc8220fbb80afdee92e19d Binary file BSseeker2/.git/objects/77/5412491f6a25e65dfc8220fbb80afdee92e19d has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/78/f0cc2d5679fac527ded2d199b7723d2858a609 Binary file BSseeker2/.git/objects/78/f0cc2d5679fac527ded2d199b7723d2858a609 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/7b/ded3bdfd5c7cde1ac7a66a94e83d4352632d5b Binary file BSseeker2/.git/objects/7b/ded3bdfd5c7cde1ac7a66a94e83d4352632d5b has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/7c/2f8e591b0355cf96ad95f887ce43c9ad9b6ccb Binary file BSseeker2/.git/objects/7c/2f8e591b0355cf96ad95f887ce43c9ad9b6ccb has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/7c/61cced8cd621d9b25c26438f9bb2270e9bdad2 Binary file BSseeker2/.git/objects/7c/61cced8cd621d9b25c26438f9bb2270e9bdad2 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/7c/ca75230355b000206fa058bb095903f55aa063 Binary file BSseeker2/.git/objects/7c/ca75230355b000206fa058bb095903f55aa063 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/7d/70d64ee9046b29c7a3bd6b8bf6f87813ab3bc6 Binary file BSseeker2/.git/objects/7d/70d64ee9046b29c7a3bd6b8bf6f87813ab3bc6 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/7e/5487ac62878708fcf58fcdb0fcd13acfc37b2f Binary file BSseeker2/.git/objects/7e/5487ac62878708fcf58fcdb0fcd13acfc37b2f has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/7e/64ec5f40ce0bd0a35f104af23729323714b648 Binary file BSseeker2/.git/objects/7e/64ec5f40ce0bd0a35f104af23729323714b648 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/82/3880bb2ebba5ae6bd1c7de7dfc4ce03935155b Binary file BSseeker2/.git/objects/82/3880bb2ebba5ae6bd1c7de7dfc4ce03935155b has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/82/ccc4a40d249856388e2a34e02f4ec42dce1b5f Binary file BSseeker2/.git/objects/82/ccc4a40d249856388e2a34e02f4ec42dce1b5f has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/83/93a0ab5e551d7c5221a1aee43d915a31a6bbd7 Binary file BSseeker2/.git/objects/83/93a0ab5e551d7c5221a1aee43d915a31a6bbd7 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/84/06d09075408fde885ddfe8c4e183a75e0b959e Binary file BSseeker2/.git/objects/84/06d09075408fde885ddfe8c4e183a75e0b959e has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/84/a6806795f21378c36629e61282f8bbaa95d9dc Binary file BSseeker2/.git/objects/84/a6806795f21378c36629e61282f8bbaa95d9dc has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/84/c267bfba017386e573583b5190da45d513d274 Binary file BSseeker2/.git/objects/84/c267bfba017386e573583b5190da45d513d274 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/84/c8795a8d3adf0ee8bb820608886f1e833d70f3 Binary file BSseeker2/.git/objects/84/c8795a8d3adf0ee8bb820608886f1e833d70f3 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/85/1f792a7fa3077c4a3b2053e7ece53db1e1b5d1 Binary file BSseeker2/.git/objects/85/1f792a7fa3077c4a3b2053e7ece53db1e1b5d1 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/85/a936d6dab3f3c5585bcab2eee7b18bbc7d7fd8 Binary file BSseeker2/.git/objects/85/a936d6dab3f3c5585bcab2eee7b18bbc7d7fd8 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/86/5761b47a7c85b4d343beeb7c9c3ed640601ebf Binary file BSseeker2/.git/objects/86/5761b47a7c85b4d343beeb7c9c3ed640601ebf has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/86/fd03bf2e7dda2de53aa0a7292173a5bf2b4bc7 Binary file BSseeker2/.git/objects/86/fd03bf2e7dda2de53aa0a7292173a5bf2b4bc7 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/87/6457ba39990e75afb27c7243da61fdd85bfdf3 Binary file BSseeker2/.git/objects/87/6457ba39990e75afb27c7243da61fdd85bfdf3 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/88/496377d81be7202c5a385ed7e5df52778c9f9d Binary file BSseeker2/.git/objects/88/496377d81be7202c5a385ed7e5df52778c9f9d has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/8b/159869b8124569a96b7e4e841ea3658a75db57 Binary file BSseeker2/.git/objects/8b/159869b8124569a96b7e4e841ea3658a75db57 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/8d/30a2f0de740bd1cbe9346cf486bb360881e7b0 Binary file BSseeker2/.git/objects/8d/30a2f0de740bd1cbe9346cf486bb360881e7b0 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/8f/0dc7039482a4fed3a0f4ee6c0dae8e61f38d91 Binary file BSseeker2/.git/objects/8f/0dc7039482a4fed3a0f4ee6c0dae8e61f38d91 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/8f/586f3d54f92f4c295f1090fc15e61d3788c4fd Binary file BSseeker2/.git/objects/8f/586f3d54f92f4c295f1090fc15e61d3788c4fd has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/8f/640235eeddfaf50d208e68adf6e08ff96348fb Binary file BSseeker2/.git/objects/8f/640235eeddfaf50d208e68adf6e08ff96348fb has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/8f/b12719f1dba55da26896e09c090e385f6f1fe6 Binary file BSseeker2/.git/objects/8f/b12719f1dba55da26896e09c090e385f6f1fe6 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/91/2dad6a9bcc2294cd04131ab7f06ffce58435c5 Binary file BSseeker2/.git/objects/91/2dad6a9bcc2294cd04131ab7f06ffce58435c5 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/91/2ef7d6168979afb5b56ed40120b4c620809da7 Binary file BSseeker2/.git/objects/91/2ef7d6168979afb5b56ed40120b4c620809da7 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/92/1edcff26ba7b7f7a2d6f2cedece007e8006c9d Binary file BSseeker2/.git/objects/92/1edcff26ba7b7f7a2d6f2cedece007e8006c9d has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/94/29632a76fafb840eeddf179810a2e8cfc18b44 Binary file BSseeker2/.git/objects/94/29632a76fafb840eeddf179810a2e8cfc18b44 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/94/a9ed024d3859793618152ea559a168bbcbb5e2 Binary file BSseeker2/.git/objects/94/a9ed024d3859793618152ea559a168bbcbb5e2 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/95/1e8892050fa8255989993946a403d42dba05a2 Binary file BSseeker2/.git/objects/95/1e8892050fa8255989993946a403d42dba05a2 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/97/02043009a1c30882ee8efd386919b9775419c8 Binary file BSseeker2/.git/objects/97/02043009a1c30882ee8efd386919b9775419c8 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/97/2d65097162fdbd73a19797366806e2b72c0861 Binary file BSseeker2/.git/objects/97/2d65097162fdbd73a19797366806e2b72c0861 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/97/bb6dd7f2cf43da4acb857b2d048ec87cb996e1 Binary file BSseeker2/.git/objects/97/bb6dd7f2cf43da4acb857b2d048ec87cb996e1 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/99/9c2eacdd1b0da3bd211cbfa7e83183e3ae1ab0 Binary file BSseeker2/.git/objects/99/9c2eacdd1b0da3bd211cbfa7e83183e3ae1ab0 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/99/e2633f8338b3c1f44cf3d1e2fa7ed5e3ce73ed Binary file BSseeker2/.git/objects/99/e2633f8338b3c1f44cf3d1e2fa7ed5e3ce73ed has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/9a/5920e30c012e1eba7a20047f4f85a48ecab04c Binary file BSseeker2/.git/objects/9a/5920e30c012e1eba7a20047f4f85a48ecab04c has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/9c/2b8bf5a1046ca6f8fdc87146c8600a4c0b863b Binary file BSseeker2/.git/objects/9c/2b8bf5a1046ca6f8fdc87146c8600a4c0b863b has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/9c/390872015d7873ebd00a98d32bbc5913414730 Binary file BSseeker2/.git/objects/9c/390872015d7873ebd00a98d32bbc5913414730 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/9d/2bc37995bb4a9434575616e95201c82fd08e02 Binary file BSseeker2/.git/objects/9d/2bc37995bb4a9434575616e95201c82fd08e02 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/9d/dd63485ec0ed6f3a7b0bd65593580ad8e18b41 Binary file BSseeker2/.git/objects/9d/dd63485ec0ed6f3a7b0bd65593580ad8e18b41 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/9e/dfef5327bb169e27dac0a8541df3e116e13e53 Binary file BSseeker2/.git/objects/9e/dfef5327bb169e27dac0a8541df3e116e13e53 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/a3/dc53707171c1390211069269899dd310e00210 Binary file BSseeker2/.git/objects/a3/dc53707171c1390211069269899dd310e00210 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/a4/01ac91383e16c013d76a4db4f4fc8a7bfe38b4 Binary file BSseeker2/.git/objects/a4/01ac91383e16c013d76a4db4f4fc8a7bfe38b4 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/a4/bfd08fca2bffe134f8499cd0b89002bcb8fd1c Binary file BSseeker2/.git/objects/a4/bfd08fca2bffe134f8499cd0b89002bcb8fd1c has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/a9/4159479577e1278a51348d1b2e7e29c5dc24f0 Binary file BSseeker2/.git/objects/a9/4159479577e1278a51348d1b2e7e29c5dc24f0 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/ac/61e2ad7235c9b954450e267b95134404f60521 Binary file BSseeker2/.git/objects/ac/61e2ad7235c9b954450e267b95134404f60521 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/ac/9fbd06659e06888188b5640e990f10720d0112 Binary file BSseeker2/.git/objects/ac/9fbd06659e06888188b5640e990f10720d0112 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/ae/8a4ec09c62f840326244374ad5a5bfdf226bd6 Binary file BSseeker2/.git/objects/ae/8a4ec09c62f840326244374ad5a5bfdf226bd6 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/b4/84357be80aac412aa1f7ccb9e45290a22303d8 Binary file BSseeker2/.git/objects/b4/84357be80aac412aa1f7ccb9e45290a22303d8 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/b4/cbc789c9cded56c331d9c262444d0cbe58e3ff Binary file BSseeker2/.git/objects/b4/cbc789c9cded56c331d9c262444d0cbe58e3ff has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/b5/758231ef4f42621409676e9e6450a3a5885e5b Binary file BSseeker2/.git/objects/b5/758231ef4f42621409676e9e6450a3a5885e5b has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/b6/26f4a52659a967b6132ad32a2e9071e6a1b004 Binary file BSseeker2/.git/objects/b6/26f4a52659a967b6132ad32a2e9071e6a1b004 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/b7/171c2f1b920b16ac41198a6b0af1dc2c2c9e78 Binary file BSseeker2/.git/objects/b7/171c2f1b920b16ac41198a6b0af1dc2c2c9e78 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/b7/1b2be06f002551f619fca2db129a4284a32cd0 Binary file BSseeker2/.git/objects/b7/1b2be06f002551f619fca2db129a4284a32cd0 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/b7/7b6e04c2f0d6cf0ff5662b8150b4cccacfeaf4 Binary file BSseeker2/.git/objects/b7/7b6e04c2f0d6cf0ff5662b8150b4cccacfeaf4 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/b8/cf58db75eaf9a015a04eb734d756b1fb6c9df1 Binary file BSseeker2/.git/objects/b8/cf58db75eaf9a015a04eb734d756b1fb6c9df1 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/b9/1997bb3d05bfaa2d85ca71ac327cd03f4fb644 Binary file BSseeker2/.git/objects/b9/1997bb3d05bfaa2d85ca71ac327cd03f4fb644 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/b9/8d98f275842e593aea9330a37de9e0fbcae1d5 Binary file BSseeker2/.git/objects/b9/8d98f275842e593aea9330a37de9e0fbcae1d5 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/ba/0a63e17eb6f573cccbbd1d33cf149a3a66f306 Binary file BSseeker2/.git/objects/ba/0a63e17eb6f573cccbbd1d33cf149a3a66f306 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/bb/9c3276c3158ae9eda8a517bb7f5f2561eafe1c Binary file BSseeker2/.git/objects/bb/9c3276c3158ae9eda8a517bb7f5f2561eafe1c has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/bc/1f424a3b8c4947e3bf03cd2af76a3c1446144e Binary file BSseeker2/.git/objects/bc/1f424a3b8c4947e3bf03cd2af76a3c1446144e has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/bc/a097526f847079098d7dfa48905c6a446b721c Binary file BSseeker2/.git/objects/bc/a097526f847079098d7dfa48905c6a446b721c has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/be/1d4b99985233fa05ce96c76feba716fc040686 Binary file BSseeker2/.git/objects/be/1d4b99985233fa05ce96c76feba716fc040686 has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/be/f543ff7089d4909c2386d8ceed3cae32e7a7fd Binary file BSseeker2/.git/objects/be/f543ff7089d4909c2386d8ceed3cae32e7a7fd has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.git/objects/bf/0578826f7cc94414b5132218481f4b3b97821a --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.git/objects/bf/0578826f7cc94414b5132218481f4b3b97821a Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,1 @@ +xTk0kWYm"q V++t,=!(҈ږIx[&H}ݧ׵ZCqVVƁuF'6՝`!(8ڨ{a-J5ƨ5zKD7,mGNX1~NvL<JMw0+ܪΦ7$?'k'Ɇx^U+,Ro> NcfJXWPF$7[p7% nmƎёS< + + + + + + + + diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.idea/encodings.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.idea/encodings.xml Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,5 @@ + + + + + diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.idea/misc.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.idea/misc.xml Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,5 @@ + + + + + diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.idea/modules.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.idea/modules.xml Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,9 @@ + + + + + + + + + diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.idea/other.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.idea/other.xml Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,7 @@ + + + + + + diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.idea/scopes/scope_settings.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.idea/scopes/scope_settings.xml Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,5 @@ + + + + \ No newline at end of file diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.idea/testrunner.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.idea/testrunner.xml Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,8 @@ + + + + + + diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.idea/vcs.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.idea/vcs.xml Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,7 @@ + + + + + + + diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/.idea/workspace.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/.idea/workspace.xml Tue Nov 05 01:55:39 2013 -0500 @@ -0,0 +1,644 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1365490852338 + 1365490852338 + + + 1365495166132 + 1365495166132 + + + 1366217039909 + 1366217039909 + + + 1366521472272 + 1366521472272 + + + 1368857624039 + 1368857624039 + + + 1368872343967 + 1368872343967 + + + 1368872394831 + 1368872394831 + + + 1369030443105 + 1369030443105 + + + 1369355034039 + 1369355034040 + + + 1369785459146 + 1369785459146 + + + 1369876356622 + 1369876356622 + + + 1370044685987 + 1370044685987 + + + 1371202921858 + 1371202921858 + + + 1371366774531 + 1371366774531 + + + 1375307754641 + 1375307754641 + + + 1383633777408 + 1383633777408 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/AUTHORS diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/FilterReads.py --- a/BSseeker2/FilterReads.py Fri Jul 12 18:47:28 2013 -0400 +++ b/BSseeker2/FilterReads.py Tue Nov 05 01:55:39 2013 -0500 @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python import sys import os @@ -226,7 +226,7 @@ (options, args) = parser.parse_args() if (options.infile is None) or (options.outfile is None) : - print parser.print_help() + parser.print_help() exit(-1) FilterReads(options.infile, options.outfile, options.keepquality) diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/README.md --- a/BSseeker2/README.md Fri Jul 12 18:47:28 2013 -0400 +++ b/BSseeker2/README.md Tue Nov 05 01:55:39 2013 -0500 @@ -48,9 +48,10 @@ * Linux or Mac OS platform * One of the following Aligner - - [bowtie](http://bowtie-bio.sourceforge.net/) - - [bowtie2](http://bowtie-bio.sourceforge.net/bowtie2/) (Recommend) + - [bowtie](http://bowtie-bio.sourceforge.net/) (fast) + - [bowtie2](http://bowtie-bio.sourceforge.net/bowtie2/) (Default) - [soap](http://soap.genomics.org.cn/) + - [rmap](http://www.cmb.usc.edu/people/andrewds/rmap/) * [Python](http://www.python.org/download/) (Version 2.6 +) (It is normally pre-installed in Linux. Type " python -V" to see the installed version.) @@ -74,7 +75,7 @@ $ python FilterReads.py Usage: FilterReads.py -i -o [-k] - Author : Guo, Weilong; guoweilong@gmail.com; 2012-11-10 + Author : Guo, Weilong; 2012-11-10 Unique reads for qseq/fastq/fasta/sequencce, and filter low quality file in qseq file. @@ -105,16 +106,16 @@ -h, --help show this help message and exit -f FILE, --file=FILE Input your reference genome file (fasta) --aligner=ALIGNER Aligner program to perform the analysis: bowtie, - bowtie2, soap [Default: bowtie2] - -p PATH, --path=PATH Path to the aligner program. Defaults: - bowtie: /u/home/mcdb/weilong/install/bowtie-0.12.8 - bowtie2: - /u/home/mcdb/weilong/install/bowtie2-2.0.0-beta7 - soap: /u/home/mcdb/weilong/install/soap2.21release/ + bowtie2, soap, rmap [Default: bowtie] + -p PATH, --path=PATH Path to the aligner program. Detected: + bowtie: ~/install/bowtie + bowtie2: ~/install/bowtie2 + rmap: ~/install/rmap_/bin + soap: ~/install/soap/ -d DBPATH, --db=DBPATH Path to the reference genome library (generated in - preprocessing genome) [Default: /u/home/mcdb/weilong/i - nstall/BSseeker2/bs_utils/reference_genomes] + preprocessing genome) [Default: ~/install + /BSseeker2/bs_utils/reference_genomes] -v, --version show version of BS-Seeker2 Reduced Representation Bisulfite Sequencing Options: @@ -125,7 +126,7 @@ certain fragments will be masked. [Default: False] -l LOW_BOUND, --low=LOW_BOUND lower bound of fragment length (excluding recognition - sequence such as C-CGG) [Default: 40] + sequence such as C-CGG) [Default: 20] -u UP_BOUND, --up=UP_BOUND upper bound of fragment length (excluding recognition sequence such as C-CGG ends) [Default: 500] @@ -134,6 +135,7 @@ Mael:(C-TAG), double-enzyme MspI&Mael:(C-CGG,C-TAG). [Default: C-CGG] + ##Example * Build genome index for WGBS using bowtie, path of bowtie should be included in $PATH @@ -150,7 +152,7 @@ * Build genome index for RRBS library for double-enzyme : MspI (C-CGG) & ApeKI (G-CWGC, where W=A|T, see [IUPAC code](http://www.bioinformatics.org/sms/iupac.html)) - python bs_seeker2-build.py -f genome.fa -r -c C-CGG,G-CWGC + python bs_seeker2-build.py -f genome.fa -r -c C-CGG,G-CWGC --aligner=bowtie ##Tips: @@ -172,75 +174,73 @@ ##Usage : $ python ~/install/BSseeker2/bs_seeker2-align.py -h - Usage: bs_seeker2-align.py [options] + Usage: bs_seeker2-align.py {-i | -1 -2 } -g [options] Options: -h, --help show this help message and exit For single end reads: -i INFILE, --input=INFILE - Input your read file name (FORMAT: sequences, - fastq, qseq,fasta) + Input read file (FORMAT: sequences, qseq, fasta, + fastq). Ex: read.fa or read.fa.gz For pair end reads: -1 FILE, --input_1=FILE - Input your read file end 1 (FORMAT: sequences, - qseq, fasta, fastq) + Input read file, mate 1 (FORMAT: sequences, qseq, + fasta, fastq) -2 FILE, --input_2=FILE - Input your read file end 2 (FORMAT: sequences, - qseq, fasta, fastq) - --minins=MIN_INSERT_SIZE + Input read file, mate 2 (FORMAT: sequences, qseq, + fasta, fastq) + -I MIN_INSERT_SIZE, --minins=MIN_INSERT_SIZE The minimum insert size for valid paired-end - alignments [Default: -1] - --maxins=MAX_INSERT_SIZE + alignments [Default: 0] + -X MAX_INSERT_SIZE, --maxins=MAX_INSERT_SIZE The maximum insert size for valid paired-end - alignments [Default: 400] + alignments [Default: 500] Reduced Representation Bisulfite Sequencing Options: - -r, --rrbs Process reads from Reduced Representation Bisulfite - Sequencing experiments + -r, --rrbs Map reads to the Reduced Representation genome -c pattern, --cut-site=pattern Cutting sites of restriction enzyme. Ex: MspI(C-CGG), Mael:(C-TAG), double-enzyme MspI&Mael:(C-CGG,C-TAG). + [Default: C-CGG] -L RRBS_LOW_BOUND, --low=RRBS_LOW_BOUND - lower bound of fragment length (excluding C-CGG ends) - [Default: 40] + Lower bound of fragment length (excluding C-CGG ends) + [Default: 20] -U RRBS_UP_BOUND, --up=RRBS_UP_BOUND - upper bound of fragment length (excluding C-CGG ends) + Upper bound of fragment length (excluding C-CGG ends) [Default: 500] General options: -t TAG, --tag=TAG [Y]es for undirectional lib, [N]o for directional [Default: N] -s CUTNUMBER1, --start_base=CUTNUMBER1 - The first base of your read to be mapped [Default: 1] + The first cycle of the read to be mapped [Default: 1] -e CUTNUMBER2, --end_base=CUTNUMBER2 - The last cycle number of your read to be mapped - [Default: 200] + The last cycle of the read to be mapped [Default: 200] -a FILE, --adapter=FILE Input text file of your adaptor sequences (to be - trimed from the 3'end of the reads). Input 1 seq for - dir. lib., 2 seqs for undir. lib. One line per - sequence + trimmed from the 3'end of the reads, ). Input one seq + for dir. lib., twon seqs for undir. lib. One line per + sequence. Only the first 10bp will be used --am=ADAPTER_MISMATCH - Number of mismatches allowed in adaptor [Default: 1] + Number of mismatches allowed in adapter [Default: 0] -g GENOME, --genome=GENOME - Name of the reference genome (the same as the - reference genome file in the preprocessing step) [ex. - chr21_hg18.fa] - -m INT_NO_MISMATCHES, --mismatches=INT_NO_MISMATCHES + Name of the reference genome (should be the same as + "-f" in bs_seeker2-build.py ) [ex. chr21_hg18.fa] + -m NO_MISMATCHES, --mismatches=NO_MISMATCHES Number of mismatches in one read [Default: 4] - --aligner=ALIGNER Aligner program to perform the analisys: bowtie, - bowtie2, soap [Default: bowtie2] + --aligner=ALIGNER Aligner program for short reads mapping: bowtie, + bowtie2, soap, rmap [Default: bowtie] -p PATH, --path=PATH - Path to the aligner program. Defaults: - bowtie: /u/home/mcdb/weilong/install/bowtie-0.12.8 - bowtie2: - /u/home/mcdb/weilong/install/bowtie2-2.0.0-beta7 - soap: /u/home/mcdb/weilong/soap2.21release/ + Path to the aligner program. Detected: + bowtie: ~/install/bowtie + bowtie2: ~/install/bowtie2 + rmap: ~/install/rmap/bin + soap: ~/install/soap/ -d DBPATH, --db=DBPATH Path to the reference genome library (generated in - preprocessing genome) [Default: /u/home/mcdb/weilong/i + preprocessing genome) [Default: ~/i nstall/BSseeker2/bs_utils/reference_genomes] -l NO_SPLIT, --split_line=NO_SPLIT Number of lines per split (the read file will be split @@ -251,7 +251,7 @@ -f FORMAT, --output-format=FORMAT Output format: bam, sam, bs_seeker1 [Default: bam] --no-header Suppress SAM header lines [Default: False] - --temp_dir=PATH The path to your temporary directory [Default: /tmp] + --temp_dir=PATH The path to your temporary directory [Detected: /tmp] --XS=XS_FILTER Filter definition for tag XS, format X,Y. X=0.8 and y=5 indicate that for one read, if #(mCH sites)/#(all CH sites)>0.8 and #(mCH sites)>5, then tag XS=1; or @@ -263,31 +263,51 @@ Aligner Options: You may specify any additional options for the aligner. You just have to prefix them with --bt- for bowtie, --bt2- for bowtie2, --soap- for - soap, and BS Seeker will pass them on. For example: --bt-p 4 will - increase the number of threads for bowtie to 4, --bt--tryhard will - instruct bowtie to try as hard as possible to find valid alignments - when they exist, and so on. Be sure that you know what you are doing - when using these options! Also, we don't do any validation on the - values. - + soap, --rmap- for rmap, and BS Seeker will pass them on. For example: + --bt-p 4 will increase the number of threads for bowtie to 4, --bt-- + tryhard will instruct bowtie to try as hard as possible to find valid + alignments when they exist, and so on. Be sure that you know what you + are doing when using these options! Also, we don't do any validation + on the values. ##Examples : -* Align from fasta format with bowtie2 (local alignment) for whole genome, allowing 3 mismatches +* WGBS library ; alignment mode, bowtie ; map to WGBS index + + python bs_seeker2-align.py -i WGBS.fa --aligner=bowtie -o WGBS.bam -f bam -g genome.fa + +* WGBS library ; alignment mode, bowtie2-local ; map to WGBS index - python bs_seeker2-align.py -i WGBS.fa -m 3 --aligner=bowtie2 -o WGBS.bam -f bam -g genome.fa + python bs_seeker2-align.py -i WGBS.fa --aligner=bowtie2 -o WGBS.bam -f bam -g genome.fa + +* WGBS library ; alignment mode, bowtie2-end-to-end ; map to WGBS index -* Align from qseq format for RRBS with bowtie, default parameters for RRBS fragments + python bs_seeker2-align.py -i WGBS.fa -m 3 --aligner=bowtie2 -o WGBS.bam -f bam -g genome.fa --bt2--end-to-end + +* RRBS library ; alignment mode, bowtie ; map to RR index - python bs_seeker2-align.py -i RRBS.fa --aligner=bowtie -o RRBS.sam -f sam -g genome.fa -r -a adapter.txt + python bs_seeker2-align.py -i RRBS.fa --aligner=bowtie -o RRBS.bam -g genome.fa -r -a adapter.txt + +* RRBS library ; alignment mode, bowtie ; map to WG index + + python bs_seeker2-align.py -i RRBS.fa --aligner=bowtie -o RRBS.bam -g genome.fa -a adapter.txt -* Align from qseq format for RRBS with bowtie (end-to-end), specifying lengths of fragments ranging [40bp, 400bp] +* RRBS library ; alignment mode, bowtie2-end-to-end ; map to WG index + + python bs_seeker2-align.py -i RRBS.fa --aligner=bowtie -o RRBS.bam -g genome.fa -a adapter.txt --bt2--end-to-end - python bs_seeker2-align.py -i RRBS.qseq --aligner=bowtie2 --bt2--end-to-end -o RRBS.bam -f bam -g genome.fa -r --low=40 --up=400 -a adapter.txt +* Align from qseq format for RRBS with bowtie, specifying lengths of fragments ranging [40bp, 400bp] + + python bs_seeker2-align.py -i RRBS.qseq --aligner=bowtie -o RRBS.bam -f bam -g genome.fa -r --low=40 --up=400 -a adapter.txt The parameters '--low' and '--up' should be the same with corresponding parameters when building the genome index +* WGBS library ; alignment mode, bowtie ; map to WGBS index; use 8 threads for alignment + + python bs_seeker2-align.py -i WGBS.fa --aligner=bowtie -o WGBS.bam -f bam -g genome.fa --bt-p 4 + +BS-Seeker2 will run TWO bowtie instances in parallel. ##Input file: @@ -341,6 +361,10 @@ For example, if 95% of reads come from fragments with length range [50bp, 250bp], you'd better choose [40bp, 300bp]. +- Fewer mismatches for the 'local alignment' mode. + + As the 'local alignment', the bad sequenced bases are usually trimmed, and would not be considered by the parameter "-m". + It is suggested to user fewer mismatches for the 'local alignment' mode. (3) bs_seeker2-call_methylation.py ------------ @@ -351,16 +375,14 @@ ##Usage: $ python bs_seeker2-call_methylation.py -h - Usage: bs_seeker2-call_methylation.py [options] - Options: -h, --help show this help message and exit -i INFILE, --input=INFILE BAM output from bs_seeker2-align.py -d DBPATH, --db=DBPATH Path to the reference genome library (generated in - preprocessing genome) [Default: /u/home/mcdb/weilong/i - nstall/BSseeker2/bs_utils/reference_genomes] + preprocessing genome) [Default: ~/install + /BSseeker2/bs_utils/reference_genomes] -o OUTFILE, --output-prefix=OUTFILE The output prefix to create ATCGmap and wiggle files [INFILE] @@ -370,6 +392,7 @@ -x, --rm-SX Removed reads with tag 'XS:i:1', which would be considered as not fully converted by bisulfite treatment [Default: False] + --txt Show CGmap and ATCGmap in .gz [Default: False] -r READ_NO, --read-no=READ_NO The least number of reads covering one site to be shown in wig file [Default: 1] @@ -430,9 +453,9 @@ (3) position (4) context (CG/CHG/CHH) (5) dinucleotide-context (CA/CC/CG/CT) - (6) methyltion-level = #-of-C / (#-of-C + #-of-T) - (7) #-of-C (methylated) - (8) (#-ofC + #-of-T) (all cytosines) + (6) methylation-level = #_of_C / (#_of_C + #_of_T). + (7) #_of_C (methylated C, the count of reads showing C here) + (8) = #_of_C + #_of_T (all Cytosines, the count of reads showing C or T here) - ATCGmap file @@ -442,7 +465,7 @@ chr1 T 3009410 -- -- 0 10 0 0 0 0 0 0 0 0 na chr1 C 3009411 CHH CC 0 10 0 0 0 0 0 0 0 0 0.0 chr1 C 3009412 CHG CC 0 10 0 0 0 0 0 0 0 0 0.0 - chr1 C 3009413 CG CG 0 10 50 0 0 0 0 0 0 0 0.833333333333 + chr1 C 3009413 CG CG 0 10 50 0 0 0 0 0 0 0 0.83 Format descriptions: @@ -467,14 +490,15 @@ (14) # of reads from Crick strand mapped here, support G on Watson strand and C on Crick strand (15) # of reads from Crick strand mapped here, support N - (16) methylation_level = #C/(#C+#T) = (C8+C14)/(C7+C8+C11+C14); "nan" means none reads support C/T at this position. + (16) methylation_level = #C/(#C+#T) = C8/(C7+C8) for Watson strand, =C14/(C11+C14) for Crick strand; + "nan" means none reads support C/T at this position. Contact Information ============ -If you still have questions on BS-Seeker 2, or you find bugs when using BS-Seeker 2, or you have suggestions, please write email to guoweilong@gmail.com (Weilong Guo). +If you still have questions on BS-Seeker 2, or you find bugs when using BS-Seeker 2, or you have suggestions, please write email to [Weilong Guo](guoweilong@gmail.com). @@ -556,7 +580,7 @@ cd BSseeker2-master/ -(4)Run BS-Seeker2 +(4)Run BS-Seeker2 Q: Can I add the path of BS-Seeker2's *.py to the $PATH, so I can call BS-Seeker2 from anywhere? @@ -585,6 +609,69 @@ bs_seeker-align.py -h bs_seeker-call_methylation.py -h +(5) Unique alignment + +Q: If I want to only keep alignments that map uniquely, is this an argument I should supply directly +to Bowtie2 (via BS Seeker 2's command line option), or is this an option that's available in +BS Seeker 2 itself? + +A: BS-Seeker2 reports unique alignment by default already. If you want to know how many reads +could have multiple hits, run BS-Seeker2 with parameter "--multiple-hit". + + +(6) Output + +Q: In CGmap files, why some lines shown "--" but not a motif (CG/CHG/CHH), for example: + + chr01 C 4303711 -- CC 0.0 0 2 + chr01 C 4303712 -- CN 0.0 0 2 + +A: In this example, the site 4303713 is "N" in genome, thus we could not decide the explict pattern. + +(7) Algorithm to remove the adapter. + +Q: What's the algorithm to remove the adapter + +A: BS-Seeker2 has built-in algorithm for removing the adapter, which is developed by [Weilong Guo](http://bioinfo.au.tsinghua.edu.cn/member/wguo/index.html). + + First, if the adapter was provided as "AGATCGGAAGAGCACACGTC", only the first 10bp would be used. + Second, we use semi-local alignment strategy for removing the adapters. + Exmaple: + + Read : ACCGCGTTGATCGAGTACGTACGTGGGTC + Adapter : ....................ACGTGGGTCCCG + + no_mismatch : the maximum number allowed for mismatches + + Algorithm: (allowing 1 mismatch) + -Step 1: + ACCGCGTTGATCGAGTACGTACGTGGGTC + ||XX + ACGTGGGTCCCG + -Step 2: + ACCGCGTTGATCGAGTACGTACGTGGGTC + X||X + .ACGTGGGTCCCG + -Step 3: + ACCGCGTTGATCGAGTACGTACGTGGGTC + XX + ..ACGTGGGTCCCG + -Step ... + -Step N: + ACCGCGTTGATCGAGTACGTACGTGGGTC + ||||||||| + ....................ACGTGGGTCCCG + Success & return! + + Third, we also removed the synthesized bases at the end of RRBS fragments. + Take the "C-CGG" cutting site as example, + + - - C|U G G - - =>cut=> - - C =>add=> - - C|C G =>sequencing + - - G G C|C - - - - G G C - - G G C + + In our algorithm, the "CG" in "--CCG" (upper strand) was trimmed, in order to get accurate methyaltion level. + + diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/RELEASE_NOTES --- a/BSseeker2/RELEASE_NOTES Fri Jul 12 18:47:28 2013 -0400 +++ b/BSseeker2/RELEASE_NOTES Tue Nov 05 01:55:39 2013 -0500 @@ -3,5 +3,25 @@ 1. Fix bug in utils.py 2. Add UPDATE file +v2.0.4 - July 29, 2013 +1. Update README.md (Definition of ATCGmap format) +2. Fix the "None" output when no parameter +3. Fix error report when using wrong path for short reads aligner +4. MIT License + +V2.0.5 - Nov 31, 2013 +1. Fix bug in "bs_single_end.py" for "numbers_premapped_lst[0] += len(Unique_FW_C2T)" +2. Make the default aligner to Bowtie +3. Update the README file +4. "-r N" in call-methylation will only control Wiggle file +5. Change the output of pair-end mapping to the standard format of SAM file (the QNAME and FLAG fields) +6. The artificially synthesized bases at the ends of RRBS fragments are removed when removing the adapters +7. Add the total number of bases of uniquely mapped reads to the STDOUT +8. Support "end-to-end" alignment mode in the Galaxy version +9. Support input file in both TEXT or Compressed (.gz) format +10. "-m" allow both INT and FLOAT as input. Ex: '-m 5' or '-m 0.05' +11. Add mode information in output + + diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/bs_align/.___init__.py Binary file BSseeker2/bs_align/.___init__.py has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/bs_align/._bs_align_utils.py Binary file BSseeker2/bs_align/._bs_align_utils.py has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/bs_align/._bs_pair_end.py Binary file BSseeker2/bs_align/._bs_pair_end.py has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/bs_align/._bs_rrbs.py Binary file BSseeker2/bs_align/._bs_rrbs.py has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/bs_align/._bs_single_end.py Binary file BSseeker2/bs_align/._bs_single_end.py has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/bs_align/._output.py Binary file BSseeker2/bs_align/._output.py has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/bs_align/bs_align_utils.py --- a/BSseeker2/bs_align/bs_align_utils.py Fri Jul 12 18:47:28 2013 -0400 +++ b/BSseeker2/bs_align/bs_align_utils.py Tue Nov 05 01:55:39 2013 -0500 @@ -54,9 +54,15 @@ """ -def RemoveAdapter ( read, adapter, no_mismatch ) : +# Remove the adapter from 3' end +def RemoveAdapter ( read, adapter, no_mismatch, rm_back=0) : lr = len(read) la = len(adapter) + # Check the empty adapter, namely, the reads start with the 2nd base of adapter, + # not including the 'A' base in front of the adapter. + if adapter[2:] == read[0:(la-1)] : + return "" + for i in xrange( lr - no_mismatch ) : read_pos = i adapter_pos = 0 @@ -74,8 +80,14 @@ adapter_pos = adapter_pos + 1 # while_end + # Cut the extra bases before the adapter + # --C|CG G-- => --CNN+A+ + # --G GC|C-- --GGC if adapter_pos == la or read_pos == lr : - return read[:i] + if i <= rm_back : + return '' + else : + return read[:(i-rm_back)] # for_end return read diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/bs_align/bs_pair_end.py --- a/BSseeker2/bs_align/bs_pair_end.py Fri Jul 12 18:47:28 2013 -0400 +++ b/BSseeker2/bs_align/bs_pair_end.py Tue Nov 05 01:55:39 2013 -0500 @@ -104,13 +104,13 @@ aligner_command, db_path, tmp_path, - outfile, XS_pct, XS_count, show_multiple_hit=False): + outfile, XS_pct, XS_count, adapter_mismatch, show_multiple_hit=False): #---------------------------------------------------------------- - adapter="" - adapterA="" - adapterB="" + adapter = "" + adapterA = "" + adapterB = "" if adapter_file !="": try : adapter_inf=open(adapter_file,"r") @@ -134,12 +134,15 @@ logm("End 1 filename: %s"% main_read_file_1 ) logm("End 2 filename: %s"% main_read_file_2 ) logm("The first base (for mapping): %d"% cut1 ) - logm("The last base (for mapping): %d"% cut2 ) + logm("The last base (for mapping): %d"% cut2 ) - logm("-------------------------------- " ) - logm("Un-directional library: %s" % asktag ) logm("Path for short reads aligner: %s"% aligner_command + '\n') logm("Reference genome library path: %s"% db_path ) + + if asktag == "Y" : + logm("Un-directional library" ) + else : + logm("Directional library") logm("Number of mismatches allowed: %s"% max_mismatch_no ) if adapter_file !="": @@ -182,6 +185,9 @@ all_trimmed=0 all_mapped=0 all_mapped_passed=0 + all_base_before_trim=0 + all_base_after_trim=0 + all_base_mapped=0 all_unmapped=0 @@ -192,6 +198,8 @@ uC_lst=[0,0,0] no_my_files=0 + read_id_lst_1 = dict() + read_id_lst_2 = dict() #---------------------------------------------------------------- print "== Start mapping ==" @@ -212,11 +220,14 @@ #---------------------------------------------------------------- outfile_1FCT = tmp_d('Trimmed_FCT_1.fa'+random_id) outfile_1RCT = tmp_d('Trimmed_RCT_1.fa'+random_id) - outfile_2FCT = tmp_d('Trimmed_FCT_2.fa'+random_id) + outfile_2RGA = tmp_d('Trimmed_FCT_2.fa'+random_id) outfile_2RCT = tmp_d('Trimmed_RCT_2.fa'+random_id) try : - read_inf = open(tmp_d(read_file_1),"r") + if read_file_1.endswith(".gz") : # support input file ending with ".gz" + read_inf = gzip.open(tmp_d(read_file_1), "rb") + else : + read_inf = open(tmp_d(read_file_1),"r") except IOError : print "[Error] Cannot open file : %s", tmp_d(read_file_1) exit(-1) @@ -226,128 +237,131 @@ if oneline[0]=="@": # Illumina GAII FastQ (Lister et al Nature 2009) input_format="fastq" - n_fastq=0 elif len(l)==1 and oneline[0]!=">": # pure sequences input_format="seq" elif len(l)==11: # Illumina GAII qseq file input_format="qseq" elif oneline[0]==">": # fasta input_format="fasta" - n_fasta=0 read_inf.close() print "Detected data format: %s" % input_format #---------------------------------------------------------------- read_file_list = [read_file_1, read_file_2] - outfile_FCT_list = [outfile_1FCT, outfile_2FCT] + outfile_FCT_list = [outfile_1FCT, outfile_2RGA] outfile_RCT_list = [outfile_1RCT, outfile_2RCT] n_list = [0, 0] - - for f in range(2): read_file = read_file_list[f] outf_FCT = open(outfile_FCT_list[f], 'w') - outf_RCT = open(outfile_RCT_list[f], 'w') + outf_RGA = open(outfile_RCT_list[f], 'w') original_bs_reads = original_bs_reads_lst[f] n = n_list[f] - - seq_id = "" + read_id = "" seq = "" seq_ready = "N" + line_no = 0 for line in fileinput.input(tmp_d(read_file)): - l=line.split() + l = line.split() + line_no += 1 if input_format=="seq": - n+=1 - seq_id=str(n) - seq_id=seq_id.zfill(12) - seq=l[0] - seq_ready="Y" + n += 1 + read_id = str(n) + read_id = read_id.zfill(12) + if f == 1 : + read_id_lst_1[read_id] = read_id + else : + read_id_lst_2[read_id] = read_id + seq = l[0] + seq_ready = "Y" elif input_format=="fastq": - m_fastq=math.fmod(n_fastq,4) - n_fastq+=1 - seq_ready="N" - if m_fastq==0: - n+=1 - seq_id=str(n) - seq_id=seq_id.zfill(12) - seq="" - elif m_fastq==1: - seq=l[0] - seq_ready="Y" - else: - seq="" + l_fastq = math.fmod(line_no, 4) + if l_fastq==1 : + all_raw_reads += 1 + read_id = str(line_no/4+1).zfill(12) + if f==1 : + read_id_lst_1[read_id] = l[0][1:] + else : + read_id_lst_2[read_id] = l[0][1:] + seq_ready="N" + elif l_fastq==2 : + seq = l[0] + seq_ready = "Y" + else : + seq = "" + seq_ready = "N" elif input_format=="qseq": - n+=1 - seq_id=str(n) - seq_id=seq_id.zfill(12) - seq=l[8] - seq_ready="Y" + all_raw_reads += 1 + read_id = str(line_no) + read_id = read_id.zfill(12) + if f == 1 : + read_id_lst_1[read_id] = l[0][1:] + else : + read_id_lst_2[read_id] = l[0][1:] + seq = l[8] + seq_ready = "Y" elif input_format=="fasta": - m_fasta=math.fmod(n_fasta,2) - n_fasta+=1 - seq_ready="N" - if m_fasta==0: - n+=1 - seq_id=l[0][1:] - seq_id=seq_id.zfill(17) - seq="" - elif m_fasta==1: - seq=l[0] - seq_ready="Y" - else: - seq="" + l_fasta = math.fmod(line_no,2) + seq_ready = "N" + if l_fasta==1: + all_raw_reads += 1 + read_id = str(line_no/2+1).zfill(12) + if f==1 : + read_id_lst_1[read_id] = l[0][1:] + else : + read_id_lst_2[read_id] = l[0][1:] + seq = "" + elif l_fasta==0 : + seq = l[0] + seq_ready = "Y" #---------------------------------------------------------------- if seq_ready=="Y": - seq=seq[cut1-1:cut2] #------- selecting 0..52 from 1..72 -e 52 - seq=seq.upper() - seq=seq.replace(".","N") + seq = seq[cut1-1:cut2] #------- selecting 0..52 from 1..72 -e 52 + seq = seq.upper() + seq = seq.replace(".","N") #--striping BS adapter from 3' read -------------------------------------------------------------- - if (adapterA !="") and (adapterB !=""): - signature=adapterA[:6] - if signature in seq: - signature_pos=seq.index(signature) - if seq[signature_pos:] in adapterA: - seq=seq[:signature_pos]#+"".join(["N" for x in range(len(seq)-len(signature_pos))]) - all_trimmed+=1 - else: - signature=adapterB[:6] - if signature in seq: - #print seq_id,seq,signature; - signature_pos=seq.index(signature) - if seq[signature_pos:] in adapterB: - seq=seq[:signature_pos]#+"".join(["N" for x in range(len(seq)-len(signature_pos))]) - all_trimmed+=1 + all_base_before_trim += len(seq) + if f==0 and adapterA!="" : + new_read = RemoveAdapter(seq, adapterA, adapter_mismatch) + if len(new_read)%s\n%s\n' % (seq_id, seq.replace("C","T"))) + outf_FCT.write('>%s\n%s\n' % (read_id, seq.replace("C","T"))) #--------- RC_G2A ------------------ - outf_RCT.write('>%s\n%s\n' % (seq_id, seq.replace("G","A"))) + outf_RGA.write('>%s\n%s\n' % (read_id, seq.replace("G","A"))) - n_list[f]=n + n_list[f] = n outf_FCT.close() - outf_RCT.close() + outf_RGA.close() fileinput.close() - #print "All input end 1: %d , end 2: %d "%(n_list[0],n_list[1]); - all_raw_reads+=n + all_raw_reads += n #-------------------------------------------------------------------------------- # Bowtie mapping #-------------------------------------------------------------------------------- - WC2T_fr=tmp_d("W_C2T_fr_m"+max_mismatch_no+".mapping"+random_id) - WC2T_rf=tmp_d("W_C2T_rf_m"+max_mismatch_no+".mapping"+random_id) - CC2T_fr=tmp_d("C_C2T_fr_m"+max_mismatch_no+".mapping"+random_id) - CC2T_rf=tmp_d("C_C2T_rf_m"+max_mismatch_no+".mapping"+random_id) + WC2T_fr = tmp_d("W_C2T_fr_m"+max_mismatch_no+".mapping"+random_id) + WC2T_rf = tmp_d("W_C2T_rf_m"+max_mismatch_no+".mapping"+random_id) + CG2A_fr = tmp_d("C_C2T_fr_m"+max_mismatch_no+".mapping"+random_id) + CC2T_rf = tmp_d("C_C2T_rf_m"+max_mismatch_no+".mapping"+random_id) run_in_parallel([aligner_command % {'reference_genome' : os.path.join(db_path,'W_C2T'), 'input_file_1' : outfile_1FCT, @@ -357,64 +371,62 @@ aligner_command % {'reference_genome' : os.path.join(db_path,'C_C2T'), 'input_file_1' : outfile_1FCT, 'input_file_2' : outfile_2RCT, - 'output_file' : CC2T_fr}, + 'output_file' : CG2A_fr}, aligner_command % {'reference_genome' : os.path.join(db_path,'W_C2T'), - 'input_file_1' : outfile_2FCT, + 'input_file_1' : outfile_2RGA, 'input_file_2' : outfile_1RCT, 'output_file' : WC2T_rf}, aligner_command % {'reference_genome' : os.path.join(db_path,'C_C2T'), - 'input_file_1' : outfile_2FCT, + 'input_file_1' : outfile_2RGA, 'input_file_2' : outfile_1RCT, 'output_file' : CC2T_rf}]) - - delete_files(outfile_1FCT, outfile_2FCT, outfile_1RCT, outfile_2RCT) + delete_files(outfile_1FCT, outfile_2RGA, outfile_1RCT, outfile_2RCT) #-------------------------------------------------------------------------------- # Post processing #-------------------------------------------------------------------------------- - FW_C2T_fr_U, FW_C2T_fr_R = extract_mapping(WC2T_fr) FW_C2T_rf_U, FW_C2T_rf_R = extract_mapping(WC2T_rf) - RC_C2T_fr_U, RC_C2T_fr_R = extract_mapping(CC2T_fr) + RC_G2A_fr_U, RC_G2A_fr_R = extract_mapping(CG2A_fr) RC_C2T_rf_U, RC_C2T_rf_R = extract_mapping(CC2T_rf) - delete_files(WC2T_fr, WC2T_rf, CC2T_fr, CC2T_rf) + delete_files(WC2T_fr, WC2T_rf, CG2A_fr, CC2T_rf) #---------------------------------------------------------------- # get uniq-hit reads #---------------------------------------------------------------- - Union_set=set(FW_C2T_fr_U.iterkeys()) | set(FW_C2T_rf_U.iterkeys()) | set(RC_C2T_fr_U.iterkeys()) | set(RC_C2T_rf_U.iterkeys()) + Union_set = set(FW_C2T_fr_U.iterkeys()) | set(FW_C2T_rf_U.iterkeys()) | set(RC_G2A_fr_U.iterkeys()) | set(RC_C2T_rf_U.iterkeys()) - Unique_FW_fr_C2T=set() # + - Unique_FW_rf_C2T=set() # + - Unique_RC_fr_C2T=set() # - - Unique_RC_rf_C2T=set() # - - Multiple_hits=set() + Unique_FW_fr_C2T = set() # + + Unique_FW_rf_C2T = set() # + + Unique_RC_fr_G2A = set() # - + Unique_RC_rf_C2T = set() # - + Multiple_hits = set() for x in Union_set: - _list=[] - for d in [FW_C2T_fr_U, FW_C2T_rf_U, RC_C2T_fr_U, RC_C2T_rf_U]: - mis_lst=d.get(x,[99]) - mis=int(mis_lst[0]) + _list = [] + for d in [FW_C2T_fr_U, FW_C2T_rf_U, RC_G2A_fr_U, RC_C2T_rf_U]: + mis_lst = d.get(x,[99]) + mis = int(mis_lst[0]) _list.append(mis) - for d in [FW_C2T_fr_R, FW_C2T_rf_R, RC_C2T_fr_R, RC_C2T_rf_R]: - mis=d.get(x,99) + for d in [FW_C2T_fr_R, FW_C2T_rf_R, RC_G2A_fr_R, RC_C2T_rf_R]: + mis = d.get(x,99) _list.append(mis) - mini=min(_list) + mini = min(_list) if _list.count(mini)==1: - mini_index=_list.index(mini) + mini_index = _list.index(mini) if mini_index==0: Unique_FW_fr_C2T.add(x) elif mini_index==1: Unique_FW_rf_C2T.add(x) elif mini_index==2: - Unique_RC_fr_C2T.add(x) + Unique_RC_fr_G2A.add(x) elif mini_index==3: Unique_RC_rf_C2T.add(x) else : @@ -424,7 +436,7 @@ # write reads rejected by Multiple Hits to file if show_multiple_hit : - outf_MH=open("Multiple_hit.fa",'w') + outf_MH = open("Multiple_hit.fa",'w') for i in Multiple_hits : outf_MH.write(">%s\n" % i) outf_MH.write("%s\n" % original_bs_reads[i]) @@ -433,32 +445,32 @@ del Union_set del FW_C2T_fr_R del FW_C2T_rf_R - del RC_C2T_fr_R + del RC_G2A_fr_R del RC_C2T_rf_R - FW_C2T_fr_uniq_lst=[[FW_C2T_fr_U[u][1],u] for u in Unique_FW_fr_C2T] - FW_C2T_rf_uniq_lst=[[FW_C2T_rf_U[u][1],u] for u in Unique_FW_rf_C2T] - RC_C2T_fr_uniq_lst=[[RC_C2T_fr_U[u][1],u] for u in Unique_RC_fr_C2T] - RC_C2T_rf_uniq_lst=[[RC_C2T_rf_U[u][1],u] for u in Unique_RC_rf_C2T] + FW_C2T_fr_uniq_lst = [[FW_C2T_fr_U[u][1],u] for u in Unique_FW_fr_C2T] + FW_C2T_rf_uniq_lst = [[FW_C2T_rf_U[u][1],u] for u in Unique_FW_rf_C2T] + RC_C2T_fr_uniq_lst = [[RC_G2A_fr_U[u][1],u] for u in Unique_RC_fr_G2A] + RC_C2T_rf_uniq_lst = [[RC_C2T_rf_U[u][1],u] for u in Unique_RC_rf_C2T] FW_C2T_fr_uniq_lst.sort() FW_C2T_rf_uniq_lst.sort() RC_C2T_fr_uniq_lst.sort() RC_C2T_rf_uniq_lst.sort() - FW_C2T_fr_uniq_lst=[x[1] for x in FW_C2T_fr_uniq_lst] - FW_C2T_rf_uniq_lst=[x[1] for x in FW_C2T_rf_uniq_lst] - RC_C2T_fr_uniq_lst=[x[1] for x in RC_C2T_fr_uniq_lst] - RC_C2T_rf_uniq_lst=[x[1] for x in RC_C2T_rf_uniq_lst] + FW_C2T_fr_uniq_lst = [x[1] for x in FW_C2T_fr_uniq_lst] + FW_C2T_rf_uniq_lst = [x[1] for x in FW_C2T_rf_uniq_lst] + RC_C2T_fr_uniq_lst = [x[1] for x in RC_C2T_fr_uniq_lst] + RC_C2T_rf_uniq_lst = [x[1] for x in RC_C2T_rf_uniq_lst] #---------------------------------------------------------------- - numbers_premapped_lst[0]+=len(Unique_FW_fr_C2T) - numbers_premapped_lst[1]+=len(Unique_FW_rf_C2T) - numbers_premapped_lst[2]+=len(Unique_RC_fr_C2T) - numbers_premapped_lst[3]+=len(Unique_RC_rf_C2T) + numbers_premapped_lst[0] += len(Unique_FW_fr_C2T) + numbers_premapped_lst[1] += len(Unique_FW_rf_C2T) + numbers_premapped_lst[2] += len(Unique_RC_fr_G2A) + numbers_premapped_lst[3] += len(Unique_RC_rf_C2T) del Unique_FW_fr_C2T del Unique_FW_rf_C2T - del Unique_RC_fr_C2T + del Unique_RC_fr_G2A del Unique_RC_rf_C2T #---------------------------------------------------------------- @@ -466,7 +478,7 @@ nn = 0 for ali_unique_lst, ali_dic in [(FW_C2T_fr_uniq_lst,FW_C2T_fr_U), (FW_C2T_rf_uniq_lst,FW_C2T_rf_U), - (RC_C2T_fr_uniq_lst,RC_C2T_fr_U), + (RC_C2T_fr_uniq_lst,RC_G2A_fr_U), (RC_C2T_rf_uniq_lst,RC_C2T_rf_U)]: nn += 1 @@ -476,15 +488,15 @@ _, mapped_chr, mapped_location_1, cigar1, mapped_location_2, cigar2 = ali_dic[header] #------------------------------------- - if mapped_chr != mapped_chr0: - my_gseq=deserialize(db_d(mapped_chr)) - chr_length=len(my_gseq) - mapped_chr0=mapped_chr + if mapped_chr!=mapped_chr0: + my_gseq = deserialize(db_d(mapped_chr)) + chr_length = len(my_gseq) + mapped_chr0 = mapped_chr #------------------------------------- - if nn == 1 or nn == 3: + if nn==1 or nn==3 : original_BS_1 = original_bs_reads_1[header] original_BS_2 = reverse_compl_seq(original_bs_reads_2[header]) - else: + else : original_BS_1 = original_bs_reads_2[header] original_BS_2 = reverse_compl_seq(original_bs_reads_1[header]) @@ -493,83 +505,41 @@ all_mapped += 1 - if nn == 1: # FW-RC mapped to + strand: - + if nn==1 : # FW-RC mapped to + strand: FR = "+FR" - -# mapped_location_1 += 1 -# origin_genome_long_1 = my_gseq[mapped_location_1 - 2 - 1 : mapped_location_1 + g_len_1 + 2 - 1] -# origin_genome_1 = origin_genome_long_1[2:-2] mapped_strand_1 = "+" - -# mapped_location_2 += 1 -# origin_genome_long_2 = my_gseq[mapped_location_2 - 2 - 1 : mapped_location_2 + g_len_2 + 2 - 1] -# origin_genome_2 = origin_genome_long_2[2:-2] mapped_strand_2 = "+" - - elif nn==2: # RC-FW mapped to + strand: - -# original_BS_1 = original_bs_reads_2[header] -# original_BS_2 = reverse_compl_seq(original_bs_reads_1[header]) + elif nn==2 : # RC-FW mapped to + strand: FR = "+RF" -# mapped_location_1 += 1 -# origin_genome_long_1 = my_gseq[mapped_location_1 - 2 - 1 : mapped_location_1 + g_len_1 + 2 - 1] -# origin_genome_1 = origin_genome_long_1[2:-2] mapped_strand_1 = "+" - -# mapped_location_2 += 1 -# origin_genome_long_2 = my_gseq[mapped_location_2 - 2 - 1 : mapped_location_2 + g_len_2 + 2 - 1] -# origin_genome_2 = origin_genome_long_2[2:-2] mapped_strand_2 = "+" - - - elif nn==3: # FW-RC mapped to - strand: -# original_BS_1=original_bs_reads_1[header] -# original_BS_2=reverse_compl_seq(original_bs_reads_2[header]) - + elif nn==3 : # FW-RC mapped to - strand: FR = "-FR" mapped_location_1 = chr_length - mapped_location_1 - g_len_1 -# origin_genome_long_1 = my_gseq[mapped_location_1 - 2 - 1 : mapped_location_1 + g_len_1 + 2 - 1] -# origin_genome_long_1 = reverse_compl_seq(origin_genome_long_1) -# origin_genome_1 = origin_genome_long_1[2:-2] mapped_strand_1 = "-" - mapped_location_2 = chr_length - mapped_location_2 - g_len_2 -# origin_genome_long_2 = reverse_compl_seq(my_gseq[mapped_location_2 - 2 - 1 : mapped_location_2 + g_len_2 + 2 - 1 ]) -# origin_genome_2 = origin_genome_long_2[2:-2] mapped_strand_2 = "-" - - elif nn==4: # RC-FW mapped to - strand: -# original_BS_1=original_bs_reads_2[header] -# original_BS_2=reverse_compl_seq(original_bs_reads_1[header]) - + elif nn==4 : # RC-FW mapped to - strand: FR = "-RF" mapped_location_1 = chr_length - mapped_location_1 - g_len_1 -# origin_genome_long_1 = my_gseq[mapped_location_1 - 2 - 1 : mapped_location_1 + g_len_1 + 2 - 1] -# origin_genome_long_1 = reverse_compl_seq(origin_genome_long_1) -# origin_genome_1 = origin_genome_long_1[2:-2] mapped_strand_1 = "-" - mapped_location_2 = chr_length - mapped_location_2 - g_len_2 -# origin_genome_long_2 = reverse_compl_seq(my_gseq[mapped_location_2 - 2 - 1 : mapped_location_2 + g_len_2 + 2 - 1]) -# origin_genome_2 = origin_genome_long_2[2:-2] mapped_strand_2 = "-" - - - origin_genome_1, next_1, output_genome_1 = get_genomic_sequence(my_gseq, mapped_location_1, mapped_location_1 + g_len_1, mapped_strand_1) origin_genome_2, next_2, output_genome_2 = get_genomic_sequence(my_gseq, mapped_location_2, mapped_location_2 + g_len_2, mapped_strand_2) r_aln_1, g_aln_1 = cigar_to_alignment(cigar1, original_BS_1, origin_genome_1) r_aln_2, g_aln_2 = cigar_to_alignment(cigar2, original_BS_2, origin_genome_2) - N_mismatch_1 = N_MIS(r_aln_1, g_aln_1) #+ original_BS_length_1 - (r_end_1 - r_start_1) # mismatches in the alignment + soft clipped nucleotides N_mismatch_2 = N_MIS(r_aln_2, g_aln_2) #+ original_BS_length_2 - (r_end_2 - r_start_2) # mismatches in the alignment + soft clipped nucleotides + N_mismatch = max(N_mismatch_1, N_mismatch_2) + mm_no=float(max_mismatch_no) + if (mm_no>=1 and N_mismatch<=mm_no) or (mm_no<1 and N_mismatch_1<=mm_no*len(original_BS_1) + and N_mismatch_2<=mm_no*len(original_BS_2)): - if max(N_mismatch_1, N_mismatch_2) <= int(max_mismatch_no) : all_mapped_passed += 1 numbers_mapped_lst[nn-1] += 1 #---- unmapped ------------------------- @@ -577,43 +547,46 @@ del original_bs_reads_2[header] #--------------------------------------- -# output_genome_1 = origin_genome_long_1[0:2] + "_" + origin_genome_1 + "_" + origin_genome_long_1[-2:] -# output_genome_2 = origin_genome_long_2[0:2] + "_" + origin_genome_2 + "_" + origin_genome_long_2[-2:] - - methy_1=methy_seq(r_aln_1, g_aln_1 + next_1) - methy_2=methy_seq(r_aln_2, g_aln_2 + next_2) + methy_1 = methy_seq(r_aln_1, g_aln_1 + next_1) + methy_2 = methy_seq(r_aln_2, g_aln_2 + next_2) mC_lst, uC_lst = mcounts(methy_1, mC_lst, uC_lst) mC_lst, uC_lst = mcounts(methy_2, mC_lst, uC_lst) - - # #---XS FILTER---------------- - #XS = 1 if "ZZZ" in methy.replace('-', '') else 0 XS_1 = 0 nCH_1 = methy_1.count('y') + methy_1.count('z') nmCH_1 = methy_1.count('Y') + methy_1.count('Z') if( (nmCH_1>XS_count) and nmCH_1/float(nCH_1+nmCH_1)>XS_pct ) : XS_1 = 1 - #XS = 1 if "ZZZ" in methy.replace('-', '') else 0 XS_2 = 0 nCH_2 = methy_2.count('y') + methy_2.count('z') nmCH_2 = methy_2.count('Y') + methy_2.count('Z') if( (nmCH_2>XS_count) and nmCH_2/float(nCH_2+nmCH_2)>XS_pct ) : XS_2 = 1 + if mapped_strand_1=='+' : + flag_1 = 67 # 1000011 : 1st read, + strand + flag_2 = 131 #10000011 : 2nd read, + strand + else : + flag_1 = 115 # 1110011 : 1st read, - strand + flag_2 = 179 # 10110011 : 2nd read, - strand - outfile.store(header, N_mismatch_1, FR, mapped_chr, mapped_strand_1, mapped_location_1, cigar1, original_BS_1, methy_1, XS_1, + outfile.store2( read_id_lst_1[header], flag_1, N_mismatch_1, FR, mapped_chr, mapped_strand_1, mapped_location_1, cigar1, original_BS_1, methy_1, XS_1, output_genome = output_genome_1, rnext = mapped_chr, pnext = mapped_location_2) - outfile.store(header, N_mismatch_2, FR, mapped_chr, mapped_strand_2, mapped_location_2, cigar2, original_BS_2, methy_2, XS_2, + all_base_mapped += len(original_BS_1) + + outfile.store2( read_id_lst_2[header], flag_2, N_mismatch_2, FR, mapped_chr, mapped_strand_2, mapped_location_2, cigar2, original_BS_2, methy_2, XS_2, output_genome = output_genome_2, rnext = mapped_chr, pnext = mapped_location_1) + all_base_mapped += len(original_BS_2) + print "--> %s %s (%d/%d) " % (read_file_1, read_file_2, no_my_files, len(my_files)) #---------------------------------------------------------------- # output unmapped pairs #---------------------------------------------------------------- - unmapped_lst=original_bs_reads_1.keys() + unmapped_lst = original_bs_reads_1.keys() unmapped_lst.sort() # for u in unmapped_lst: @@ -623,189 +596,188 @@ all_unmapped += len(unmapped_lst) + # Directional library if asktag=="N": #---------------------------------------------------------------- - outfile_1FCT= tmp_d('Trimed_FCT_1.fa'+random_id) - outfile_2FCT= tmp_d('Trimed_FCT_2.fa'+random_id) + outfile_1FCT = tmp_d('Trimed_FCT_1.fa'+random_id) + outfile_2RGA = tmp_d('Trimed_RGA_2.fa'+random_id) try : - read_inf=open(tmp_d(read_file_1),"r") + if read_file_1.endswith(".gz") : # support input file ending with ".gz" + read_inf = gzip.open(tmp_d(read_file_1), "rb") + else : + read_inf = open(tmp_d(read_file_1),"r") except IOError : print "[Error] Cannot open file : %s", tmp_d(read_file_1) exit(-1) - oneline=read_inf.readline() - l=oneline.split() - input_format="" - - #if len(l)==5: # old solexa format - # input_format="old Solexa Seq file" + oneline = read_inf.readline() + l = oneline.split() + input_format = "" - if oneline[0]=="@": # Illumina GAII FastQ (Lister et al Nature 2009) - input_format="FastQ" - n_fastq=0 - elif len(l)==1 and oneline[0]!=">": # pure sequences - input_format="_list of sequences" - elif len(l)==11: # Illumina GAII qseq file - input_format="Illumina GAII qseq file" - elif oneline[0]==">": # fasta - input_format="fasta" - n_fasta=0 + if oneline[0]=="@" : + input_format = "fastq" + elif len(l)==1 and oneline[0]!=">" : + input_format = "seq" + elif len(l)==11 : + input_format = "qseq" + elif oneline[0]==">" : + input_format = "fasta" read_inf.close() print "Detected data format: %s" % input_format #---------------------------------------------------------------- - read_file_list=[read_file_1,read_file_2] - outfile_FCT_list=[outfile_1FCT,outfile_2FCT] - n_list=[0,0] + read_file_list = [read_file_1,read_file_2] + outfile_FCT_list = [outfile_1FCT,outfile_2RGA] + n_list = [0,0] for f in range(2): - read_file=read_file_list[f] - outf_FCT=open(outfile_FCT_list[f],'w') + read_file = read_file_list[f] + outf_FCT = open(outfile_FCT_list[f],'w') original_bs_reads = original_bs_reads_lst[f] - n=n_list[f] + n = n_list[f] - seq_id="" - seq="" - seq_ready="N" + read_id = "" + seq = "" + seq_ready = "N" + line_no = 0 for line in fileinput.input(tmp_d(read_file)): - l=line.split() - if input_format=="old Solexa Seq file": - n+=1 - seq_id=str(n) - seq_id=seq_id.zfill(12) - seq=l[4] - seq_ready="Y" - elif input_format=="_list of sequences": - n+=1 - seq_id=str(n) - seq_id=seq_id.zfill(12) - seq=l[0] - seq_ready="Y" - elif input_format=="FastQ": - m_fastq=math.fmod(n_fastq,4) - n_fastq+=1 - seq_ready="N" - if m_fastq==0: - n+=1 - seq_id=str(n) - seq_id=seq_id.zfill(12) - seq="" - elif m_fastq==1: - seq=l[0] - seq_ready="Y" - else: - seq="" - elif input_format=="Illumina GAII qseq file": - n+=1 - seq_id=str(n) - seq_id=seq_id.zfill(12) - seq=l[8] - seq_ready="Y" + l = line.split() + line_no += 1 + if input_format=="seq": + n += 1 + read_id = str(n) + read_id = read_id.zfill(12) + if f == 1 : + read_id_lst_1[read_id] = read_id + else : + read_id_lst_2[read_id] = read_id + seq = l[0] + seq_ready = "Y" + elif input_format=="fastq": + l_fastq = math.fmod(line_no, 4) + if l_fastq==1 : + all_raw_reads += 1 + read_id = str(line_no/4+1).zfill(12) + if f==1 : + read_id_lst_1[read_id] = l[0][1:] + else : + read_id_lst_2[read_id] = l[0][1:] + seq_ready = "N" + elif l_fastq==2 : + seq = l[0] + seq_ready = "Y" + else : + seq = "" + seq_ready = "N" + elif input_format=="qseq": + all_raw_reads += 1 + read_id = str(line_no) + read_id = read_id.zfill(12) + if f == 1 : + read_id_lst_1[read_id] = l[0][1:] + else : + read_id_lst_2[read_id] = l[0][1:] + seq = l[8] + seq_ready = "Y" elif input_format=="fasta": - m_fasta=math.fmod(n_fasta,2) - n_fasta+=1 - seq_ready="N" - if m_fasta==0: - n+=1 - seq_id=l[0][1:] - seq_id=seq_id.zfill(17) - seq="" - elif m_fasta==1: - seq=l[0] - seq_ready="Y" - else: - seq="" + l_fasta = math.fmod(line_no,2) + if l_fasta==1: + seq_ready = "N" + all_raw_reads += 1 + read_id = str(line_no/2+1).zfill(12) + if f == 1 : + read_id_lst_1[read_id] = l[0][1:] + else : + read_id_lst_2[read_id] = l[0][1:] + seq = "" + elif l_fasta==0 : + seq = l[0] + seq_ready = "Y" #---------------------------------------------------------------- if seq_ready=="Y": - seq=seq[cut1-1:cut2] #<----------------------selecting 0..52 from 1..72 -e 52 - seq=seq.upper() - seq=seq.replace(".","N") + seq = seq[cut1-1:cut2] #<----------------------selecting 0..52 from 1..72 -e 52 + seq = seq.upper() + seq = seq.replace(".","N") #--striping BS adapter from 3' read -------------------------------------------------------------- - if (adapterA !="") and (adapterB !=""): - signature=adapterA[:6] - if signature in seq: - signature_pos=seq.index(signature) - if seq[signature_pos:] in adapterA: - seq=seq[:signature_pos]#+"".join(["N" for x in range(len(seq)-len(signature_pos))]) - all_trimmed+=1 - else: - signature=adapterB[:6] - if signature in seq: - #print seq_id,seq,signature; - signature_pos=seq.index(signature) - if seq[signature_pos:] in adapterB: - seq=seq[:signature_pos]#+"".join(["N" for x in range(len(seq)-len(signature_pos))]) - all_trimmed+=1 + all_base_before_trim += len(seq) + if f==0 and adapterA!="" : + new_read = RemoveAdapter(seq, adapterA, adapter_mismatch) + if len(new_read) < len(seq) : + all_trimmed += 1 + seq = new_read + if f==1 and adapterB!="" : + new_read = RemoveAdapter(seq, adapterB, adapter_mismatch) + if len(new_read)%s\n%s\n'% (seq_id, seq.replace("C","T"))) + outf_FCT.write('>%s\n%s\n'% (read_id, seq.replace("C","T"))) elif f==1: - outf_FCT.write('>%s\n%s\n'% (seq_id, reverse_compl_seq(seq).replace("C","T"))) + outf_FCT.write('>%s\n%s\n'% (read_id, seq.replace("G","A"))) - - n_list[f]=n + n_list[f] = n outf_FCT.close() - fileinput.close() - #print "All input end 1: %d , end 2: %d "%(n_list[0],n_list[1]); - all_raw_reads+=n + all_raw_reads += n #-------------------------------------------------------------------------------- # Bowtie mapping #-------------------------------------------------------------------------------- - WC2T_fr=tmp_d("W_C2T_fr_m"+max_mismatch_no+".mapping"+random_id) - CC2T_fr=tmp_d("C_C2T_fr_m"+max_mismatch_no+".mapping"+random_id) + WC2T_fr = tmp_d("W_C2T_fr_m"+max_mismatch_no+".mapping"+random_id) + CG2A_fr = tmp_d("C_C2T_fr_m"+max_mismatch_no+".mapping"+random_id) run_in_parallel([ aligner_command % {'reference_genome' : os.path.join(db_path,'W_C2T'), 'input_file_1' : outfile_1FCT, - 'input_file_2' : outfile_2FCT, + 'input_file_2' : outfile_2RGA, 'output_file' : WC2T_fr}, aligner_command % {'reference_genome' : os.path.join(db_path,'C_C2T'), 'input_file_1' : outfile_1FCT, - 'input_file_2' : outfile_2FCT, - 'output_file' : CC2T_fr} ]) + 'input_file_2' : outfile_2RGA, + 'output_file' : CG2A_fr} ]) - delete_files(outfile_1FCT, outfile_2FCT) + delete_files(outfile_1FCT, outfile_2RGA) #-------------------------------------------------------------------------------- # Post processing #-------------------------------------------------------------------------------- FW_C2T_fr_U, FW_C2T_fr_R = extract_mapping(WC2T_fr) - RC_C2T_fr_U, RC_C2T_fr_R = extract_mapping(CC2T_fr) + RC_G2A_fr_U, RC_G2A_fr_R = extract_mapping(CG2A_fr) #---------------------------------------------------------------- # get uniq-hit reads #---------------------------------------------------------------- - Union_set = set(FW_C2T_fr_U.iterkeys()) | set(RC_C2T_fr_U.iterkeys()) + Union_set = set(FW_C2T_fr_U.iterkeys()) | set(RC_G2A_fr_U.iterkeys()) Unique_FW_fr_C2T = set() # + - Unique_RC_fr_C2T = set() # - + Unique_RC_fr_G2A = set() # - Multiple_hits=set() for x in Union_set: _list = [] - for d in [FW_C2T_fr_U, RC_C2T_fr_U]: + for d in [FW_C2T_fr_U, RC_G2A_fr_U]: mis_lst = d.get(x,[99]) mis = int(mis_lst[0]) _list.append(mis) - for d in [FW_C2T_fr_R, RC_C2T_fr_R]: + for d in [FW_C2T_fr_R, RC_G2A_fr_R]: mis = d.get(x,99) _list.append(mis) mini = min(_list) @@ -814,7 +786,7 @@ if mini_index == 0: Unique_FW_fr_C2T.add(x) elif mini_index == 1: - Unique_RC_fr_C2T.add(x) + Unique_RC_fr_G2A.add(x) else : Multiple_hits.add(x) else : @@ -822,35 +794,34 @@ # write reads rejected by Multiple Hits to file if show_multiple_hit : - outf_MH=open("Multiple_hit.fa",'w') + outf_MH = open("Multiple_hit.fa",'w') for i in Multiple_hits : outf_MH.write(">%s\n" % i) outf_MH.write("%s\n" % original_bs_reads[i]) outf_MH.close() - FW_C2T_fr_uniq_lst=[[FW_C2T_fr_U[u][1],u] for u in Unique_FW_fr_C2T] - RC_C2T_fr_uniq_lst=[[RC_C2T_fr_U[u][1],u] for u in Unique_RC_fr_C2T] + FW_C2T_fr_uniq_lst = [[FW_C2T_fr_U[u][1],u] for u in Unique_FW_fr_C2T] + RC_C2T_fr_uniq_lst = [[RC_G2A_fr_U[u][1],u] for u in Unique_RC_fr_G2A] FW_C2T_fr_uniq_lst.sort() RC_C2T_fr_uniq_lst.sort() - FW_C2T_fr_uniq_lst=[x[1] for x in FW_C2T_fr_uniq_lst] - RC_C2T_fr_uniq_lst=[x[1] for x in RC_C2T_fr_uniq_lst] + FW_C2T_fr_uniq_lst = [x[1] for x in FW_C2T_fr_uniq_lst] + RC_C2T_fr_uniq_lst = [x[1] for x in RC_C2T_fr_uniq_lst] #---------------------------------------------------------------- - numbers_premapped_lst[0]+=len(Unique_FW_fr_C2T) - numbers_premapped_lst[1]+=len(Unique_RC_fr_C2T) + numbers_premapped_lst[0] += len(Unique_FW_fr_C2T) + numbers_premapped_lst[1] += len(Unique_RC_fr_G2A) #---------------------------------------------------------------- nn = 0 - for ali_unique_lst, ali_dic in [(FW_C2T_fr_uniq_lst,FW_C2T_fr_U), (RC_C2T_fr_uniq_lst,RC_C2T_fr_U)]: + for ali_unique_lst, ali_dic in [(FW_C2T_fr_uniq_lst,FW_C2T_fr_U), (RC_C2T_fr_uniq_lst,RC_G2A_fr_U)]: nn += 1 mapped_chr0 = "" for header in ali_unique_lst: _, mapped_chr, mapped_location_1, cigar1, mapped_location_2, cigar2 = ali_dic[header] - #------------------------------------- if mapped_chr != mapped_chr0: my_gseq = deserialize(db_d(mapped_chr)) @@ -860,6 +831,7 @@ original_BS_1 = original_bs_reads_1[header] original_BS_2 = reverse_compl_seq(original_bs_reads_2[header]) + #original_BS_2 = original_bs_reads_2[header] r_start_1, r_end_1, g_len_1 = get_read_start_end_and_genome_length(cigar1) r_start_2, r_end_2, g_len_2 = get_read_start_end_and_genome_length(cigar2) @@ -868,43 +840,30 @@ if nn == 1: # FW-RC mapped to + strand: FR = "+FR" -# mapped_location_1 += 1 -# origin_genome_long_1 = my_gseq[mapped_location_1 - 2 - 1 : mapped_location_1 + g_len_1 + 2 - 1] -# origin_genome_1 = origin_genome_long_1[2:-2] mapped_strand_1 = "+" - -# mapped_location_2 += 1 -# origin_genome_long_2 = my_gseq[mapped_location_2 - 2 - 1 : mapped_location_2 + g_len_2 + 2 - 1] -# origin_genome_2 = origin_genome_long_2[2:-2] mapped_strand_2 = "+" - elif nn == 2: # FW-RC mapped to - strand: - FR="-FR" mapped_location_1 = chr_length - mapped_location_1 - g_len_1 -# origin_genome_long_1 = my_gseq[mapped_location_1 - 2 - 1 : mapped_location_1 + g_len_1 + 2 - 1] -# origin_genome_long_1 = reverse_compl_seq(origin_genome_long_1) -# origin_genome_1 = origin_genome_long_1[2:-2] mapped_strand_1 = "-" - mapped_location_2 = chr_length - mapped_location_2 - g_len_2 -# origin_genome_long_2 = reverse_compl_seq(my_gseq[mapped_location_2 - 2 - 1 : mapped_location_2 + g_len_2 + 2 - 1]) -# origin_genome_2 = origin_genome_long_2[2:-2] mapped_strand_2 = "-" - - origin_genome_1, next_1, output_genome_1 = get_genomic_sequence(my_gseq, mapped_location_1, mapped_location_1 + g_len_1, mapped_strand_1) origin_genome_2, next_2, output_genome_2 = get_genomic_sequence(my_gseq, mapped_location_2, mapped_location_2 + g_len_2, mapped_strand_2) - r_aln_1, g_aln_1 = cigar_to_alignment(cigar1, original_BS_1, origin_genome_1) r_aln_2, g_aln_2 = cigar_to_alignment(cigar2, original_BS_2, origin_genome_2) N_mismatch_1 = N_MIS(r_aln_1, g_aln_1) #+ original_BS_length_1 - (r_end_1 - r_start_1) # mismatches in the alignment + soft clipped nucleotides N_mismatch_2 = N_MIS(r_aln_2, g_aln_2) #+ original_BS_length_2 - (r_end_2 - r_start_2) # mismatches in the alignment + soft clipped nucleotides - if max(N_mismatch_1, N_mismatch_2) <= int(max_mismatch_no): +# if max(N_mismatch_1, N_mismatch_2) <= int(max_mismatch_no): +# if N_mismatch <= int(max_mismatch_no) : + N_mismatch = max(N_mismatch_1, N_mismatch_2) + mm_no=float(max_mismatch_no) + if (mm_no>=1 and N_mismatch<=mm_no) or (mm_no<1 and N_mismatch_1<=mm_no*len(original_BS_1) + and N_mismatch_2<=mm_no*len(original_BS_2)): numbers_mapped_lst[nn-1] += 1 all_mapped_passed += 1 @@ -914,8 +873,6 @@ del original_bs_reads_2[header] #--------------------------------------- -# output_genome_1 = origin_genome_long_1[0:2] + "_" + origin_genome_1 + "_" + origin_genome_long_1[-2:] -# output_genome_2 = origin_genome_long_2[0:2] + "_" + origin_genome_2 + "_" + origin_genome_long_2[-2:] methy_1=methy_seq(r_aln_1, g_aln_1 + next_1) methy_2=methy_seq(r_aln_2, g_aln_2 + next_2) mC_lst,uC_lst = mcounts(methy_1, mC_lst, uC_lst) @@ -936,11 +893,20 @@ if( (nmCH_2>XS_count) and nmCH_2/float(nCH_2+nmCH_2)>XS_pct ) : XS_2 = 1 + if mapped_strand_1 == '+' : + flag_1 = 67 # 1000011 : 1st read, + strand + flag_2 = 131 # 10000011 : 2nd read, + strand + else : + flag_1 = 115 # 1110011 : 1st read, - strand + flag_2 = 179 # 10110011 : 2nd read, - strand - outfile.store(header, N_mismatch_1, FR, mapped_chr, mapped_strand_1, mapped_location_1, cigar1, original_BS_1, methy_1, XS_1, + outfile.store2( read_id_lst_1[header], flag_1, N_mismatch_1, FR, mapped_chr, mapped_strand_1, mapped_location_1, cigar1, original_BS_1, methy_1, XS_1, output_genome = output_genome_1, rnext = mapped_chr, pnext = mapped_location_2) - outfile.store(header, N_mismatch_2, FR, mapped_chr, mapped_strand_2, mapped_location_2, cigar2, original_BS_2, methy_2, XS_2, + all_base_mapped += len(original_BS_1) + + outfile.store2( read_id_lst_2[header], flag_2, N_mismatch_2, FR, mapped_chr, mapped_strand_2, mapped_location_2, cigar2, original_BS_2, methy_2, XS_2, output_genome = output_genome_2, rnext = mapped_chr, pnext = mapped_location_1) + all_base_mapped += len(original_BS_2) print "--> %s %s (%d/%d) " % (read_file_1, read_file_2, no_my_files, len(my_files)) #---------------------------------------------------------------- @@ -956,45 +922,40 @@ all_unmapped+=len(unmapped_lst) - #================================================================================================== -# outf.close() -# -# outf_u1.close() -# outf_u2.close() delete_files(tmp_path) logm("-------------------------------- " ) - logm("O Number of raw BS-read pairs: %d ( %d bp)"%(all_raw_reads,cut2-cut1+1) ) - logm("O Number of ends trimmed for adapter: %d"% all_trimmed+"\n") + logm("Number of raw BS-read pairs: %d " %(all_raw_reads/2) ) + if adapterA != "" or adapterB != "" : + logm("Number of reads having adapter removed: %d \n"% all_trimmed) + logm("Number of bases after trimming the adapters: %d (%1.3f)" % (all_base_after_trim, float(all_base_after_trim)/all_base_before_trim) ) if all_raw_reads >0: - logm("O Number of unique-hits read pairs for post-filtering: %d" % all_mapped + "\n") + logm("Number of reads rejected because of multiple hits: %d\n" % len(Multiple_hits) ) + logm("Number of unique-hits reads (before post-filtering): %d" % all_mapped + "\n") if asktag=="Y": - logm("O -- %7d FW-RC pairs mapped to Watson strand (before post-filtering)"%(numbers_premapped_lst[0]) ) - logm("O -- %7d RC-FW pairs mapped to Watson strand (before post-filtering)"%(numbers_premapped_lst[1]) ) - logm("O -- %7d FW-RC pairs mapped to Crick strand (before post-filtering)"%(numbers_premapped_lst[2]) ) - logm("O -- %7d RC-FW pairs mapped to Crick strand (before post-filtering)"%(numbers_premapped_lst[3]) ) + logm("-- %7d FW-RC pairs mapped to Watson strand (before post-filtering)"%(numbers_premapped_lst[0]) ) + logm("-- %7d RC-FW pairs mapped to Watson strand (before post-filtering)"%(numbers_premapped_lst[1]) ) + logm("-- %7d FW-RC pairs mapped to Crick strand (before post-filtering)"%(numbers_premapped_lst[2]) ) + logm("-- %7d RC-FW pairs mapped to Crick strand (before post-filtering)"%(numbers_premapped_lst[3]) ) elif asktag=="N": - logm("O -- %7d FW-RC pairs mapped to Watson strand (before post-filtering)"%(numbers_premapped_lst[0]) ) - logm("O -- %7d FW-RC pairs mapped to Crick strand (before post-filtering)"%(numbers_premapped_lst[1]) ) - + logm("-- %7d FW-RC pairs mapped to Watson strand (before post-filtering)"%(numbers_premapped_lst[0]) ) + logm("-- %7d FW-RC pairs mapped to Crick strand (before post-filtering)"%(numbers_premapped_lst[1]) ) + logm("--- %d uniquely aligned pairs, where each end has mismatches <= %s"%(all_mapped_passed, max_mismatch_no) ) + if asktag=="Y": + logm("----- %7d FW-RC pairs mapped to Watson strand"%(numbers_mapped_lst[0]) ) + logm("----- %7d RC-FW pairs mapped to Watson strand"%(numbers_mapped_lst[1]) ) + logm("----- %7d FW-RC pairs mapped to Crick strand"%(numbers_mapped_lst[2]) ) + logm("----- %7d RC-FW pairs mapped to Crick strand"%(numbers_mapped_lst[3]) ) + elif asktag=="N": + logm("----- %7d FW-RC pairs mapped to Watson strand"%(numbers_mapped_lst[0]) ) + logm("----- %7d FW-RC pairs mapped to Crick strand"%(numbers_mapped_lst[1]) ) - logm("O --- Number of reads rejected because of multiple hits: %d\n" % len(Multiple_hits) ) - logm("O --- %d uniquely aligned pairs, where each end has mismatches <= %s"%(all_mapped_passed, max_mismatch_no) ) - if asktag=="Y": - logm("O ----- %7d FW-RC pairs mapped to Watson strand"%(numbers_mapped_lst[0]) ) - logm("O ----- %7d RC-FW pairs mapped to Watson strand"%(numbers_mapped_lst[1]) ) - logm("O ----- %7d FW-RC pairs mapped to Crick strand"%(numbers_mapped_lst[2]) ) - logm("O ----- %7d RC-FW pairs mapped to Crick strand"%(numbers_mapped_lst[3]) ) - elif asktag=="N": - logm("O ----- %7d FW-RC pairs mapped to Watson strand"%(numbers_mapped_lst[0]) ) - logm("O ----- %7d FW-RC pairs mapped to Crick strand"%(numbers_mapped_lst[1]) ) - - logm("O Mappability= %1.4f%%"%(100*float(all_mapped_passed)/all_raw_reads) ) - logm("O Unmapped read pairs: %d"% all_unmapped+"\n") - + logm("Mappability= %1.4f%%"%(100*float(all_mapped_passed)*2/all_raw_reads) ) + logm("Total bases of uniquely mapped reads %7d"% all_base_mapped ) + logm("Unmapped read pairs: %d"% all_unmapped+"\n") n_CG=mC_lst[0]+uC_lst[0] n_CHG=mC_lst[1]+uC_lst[1] diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/bs_align/bs_rrbs.py --- a/BSseeker2/bs_align/bs_rrbs.py Fri Jul 12 18:47:28 2013 -0400 +++ b/BSseeker2/bs_align/bs_rrbs.py Tue Nov 05 01:55:39 2013 -0500 @@ -42,24 +42,7 @@ # For double enzyme: cut_format="C-CGG,A-CTG"; ApekI:"G^CWGC" #cut_context = re.sub("-", "", cut_format) # Ex. cut_format="C-CGG,AT-CG,G-CWGC" - """ - :param main_read_file: - :param asktag: - :param adapter_file: - :param cut_s: - :param cut_e: - :param no_small_lines: - :param max_mismatch_no: - :param aligner_command: - :param db_path: - :param tmp_path: - :param outfile: - :param XS_pct: - :param XS_count: - :param adapter_mismatch: - :param cut_format: - """ cut_format_lst = EnumerateIUPAC(cut_format.upper().split(",")) # ['G-CAGC', 'AT-CG', 'C-CGG', 'G-CTGC'] cut_context = [i.replace("-","") for i in cut_format_lst] # ['GCAGC', 'ATCG', 'CCGG', 'GCTGC'] cut5_context = [re.match( r'(.*)\-(.*)', i).group(1) for i in cut_format_lst] # ['G', 'AT', 'C', 'G'] @@ -106,18 +89,31 @@ print "[Error] Cannot find adapter file : %s !" % adapter_file exit(-1) - logm("I Read filename: %s" % main_read_file) - logm("I The last cycle (for mapping): %d" % cut_e ) - logm("I Bowtie path: %s" % aligner_command ) - logm("I Reference genome library path: %s" % db_path ) - logm("I Number of mismatches allowed: %s" % max_mismatch_no) - logm("I Adapter seq: %s" % whole_adapter_seq) - logm("----------------------------------------------") + logm("Read filename: %s" % main_read_file) + logm("The first base (for mapping): %d"% cut_s ) + logm("The last base (for mapping): %d"% cut_e ) + + logm("Path for short reads aligner: %s"% aligner_command + '\n') + logm("Reference genome library path: %s"% db_path ) + + if asktag == "Y" : + logm("Un-directional library" ) + else : + logm("Directional library") + + logm("Number of mismatches allowed: %s"% max_mismatch_no ) + + if adapter_file !="": + logm("Adapter seq: %s" % whole_adapter_seq) + logm("-------------------------------- " ) #---------------------------------------------------------------- all_raw_reads=0 all_tagged=0 all_tagged_trimmed=0 + all_base_before_trim=0 + all_base_after_trim=0 + all_base_mapped=0 all_mapped=0 all_mapped_passed=0 n_cut_tag_lst={} @@ -135,6 +131,9 @@ num_mapped_FW_G2A = 0 num_mapped_RC_G2A = 0 + # Count of nucleotides, which should be cut before the adapters + Extra_base_cut_5end_adapter = max([ abs(len(i)-len(j)) for i,j in zip(cut5_context, cut3_context)]) + #=============================================== # directional sequencing #=============================================== @@ -156,72 +155,71 @@ #--- Checking input format ------------------------------------------ try : - read_inf=open(read_file,"r") + if read_file.endswith(".gz") : # support input file ending with ".gz" + read_inf = gzip.open(read_file, "rb") + else : + read_inf=open(read_file,"r") except IOError: print "[Error] Cannot open input file : %s" % read_file exit(-1) - oneline=read_inf.readline() - l=oneline.split() - n_fastq=0 - n_fasta=0 - input_format="" - if oneline[0]=="@": # FastQ - input_format="fastq" - elif len(l)==1 and oneline[0]!=">": # pure sequences - input_format="seq" - elif len(l)==11: # Illumina qseq - input_format="qseq" - elif oneline[0]==">": # fasta - input_format="fasta" + oneline = read_inf.readline() + l = oneline.split() + input_format = "" + if oneline[0]=="@": + input_format = "fastq" + elif len(l)==1 and oneline[0]!=">" : + input_format = "seq" + elif len(l)==11: + input_format = "qseq" + elif oneline[0]==">" : + input_format = "fasta" read_inf.close() + #---------------------------------------------------------------- - seq_id="" - seq="" - seq_ready=0 - for line in fileinput.input(read_file): - l=line.split() - + read_id = "" + seq = "" + seq_ready = 0 + line_no = 0 + for line in fileinput.input(read_file, openhook=fileinput.hook_compressed): + l = line.split() + line_no += 1 if input_format=="seq": - all_raw_reads+=1 - seq_id=str(all_raw_reads) - seq_id=seq_id.zfill(12) - seq=l[0] - seq_ready="Y" + all_raw_reads += 1 + read_id = str(all_raw_reads) + read_id = read_id.zfill(12) + seq = l[0] + seq_ready = "Y" elif input_format=="fastq": - m_fastq=math.fmod(n_fastq,4) - n_fastq+=1 - seq_ready="N" - if m_fastq==0: - all_raw_reads+=1 - seq_id=str(all_raw_reads) - seq_id=seq_id.zfill(12) - seq="" - elif m_fastq==1: - seq=l[0] - seq_ready="Y" - else: - seq="" + l_fastq = math.fmod(line_no, 4) + if l_fastq == 1 : + all_raw_reads += 1 + read_id = l[0][1:] + seq_ready = "N" + elif l_fastq == 2 : + seq = l[0] + seq_ready = "Y" + else : + seq = "" + seq_ready = "N" elif input_format=="qseq": - all_raw_reads+=1 - seq_id=str(all_raw_reads) - seq_id=seq_id.zfill(12) - seq=l[8] - seq_ready="Y" - elif input_format=="fasta": - m_fasta=math.fmod(n_fasta,2) - n_fasta+=1 - seq_ready="N" - if m_fasta==0: - all_raw_reads+=1 - seq_id=l[0][1:] - seq="" - elif m_fasta==1: - seq=l[0] - seq_ready="Y" - else: - seq="" + all_raw_reads += 1 + read_id = str(all_raw_reads) + read_id = read_id.zfill(12) + seq = l[8] + seq_ready = "Y" + elif input_format=="fasta" : + l_fasta = math.fmod(line_no,2) + if l_fasta==1: + all_raw_reads += 1 + read_id = l[0][1:] + seq = "" + seq_ready = "N" + elif l_fasta==0 : + seq = l[0] + seq_ready = "Y" + #--------------------------------------------------------------- if seq_ready=="Y": # Normalize the characters @@ -236,19 +234,22 @@ seq = seq[(cut_s-1):cut_e] # cut_s start from 1 cycle by default #-- Trimming adapter sequence --- + + all_base_before_trim += len(seq) if adapter_seq != "" : - new_read = RemoveAdapter(seq, adapter_seq, adapter_mismatch) + new_read = RemoveAdapter(seq, adapter_seq, adapter_mismatch, Extra_base_cut_5end_adapter) if len(new_read) < len(seq) : all_tagged_trimmed += 1 seq = new_read + all_base_after_trim += len(seq) if len(seq) <= 4 : seq = "N" * (cut_e - cut_s) # all reads will be considered, regardless of tags #--------- trimmed_raw_BS_read and qscore ------------------ - original_bs_reads[seq_id] = seq + original_bs_reads[read_id] = seq #--------- FW_C2T ------------------ - outf2.write('>%s\n%s\n'%(seq_id, seq.replace('C', 'T'))) + outf2.write('>%s\n%s\n'%(read_id, seq.replace('C', 'T'))) fileinput.close() outf2.close() @@ -384,7 +385,9 @@ N_mismatch = N_MIS(r_aln, g_aln) - if N_mismatch <= int(max_mismatch_no) : +# if N_mismatch <= int(max_mismatch_no) : + mm_no=float(max_mismatch_no) + if (mm_no>=1 and N_mismatch<=mm_no) or (mm_no<1 and N_mismatch<=(mm_no*len(r_aln)) ): all_mapped_passed += 1 methy = methy_seq(r_aln, g_aln + next2bp) mC_lst, uC_lst = mcounts(methy, mC_lst, uC_lst) @@ -402,6 +405,7 @@ my_region_serial = my_region_serial, my_region_start = my_region_start, my_region_end = my_region_end) + all_base_mapped += len(original_BS) else : print "[For debug]: reads not in same lengths" @@ -453,7 +457,9 @@ N_mismatch = N_MIS(r_aln, g_aln) - if N_mismatch <= int(max_mismatch_no) : +# if N_mismatch <= int(max_mismatch_no) : + mm_no=float(max_mismatch_no) + if (mm_no>=1 and N_mismatch<=mm_no) or (mm_no<1 and N_mismatch<=(mm_no*len(r_aln)) ): all_mapped_passed += 1 methy = methy_seq(r_aln, g_aln + next2bp) mC_lst, uC_lst = mcounts(methy, mC_lst, uC_lst) @@ -471,6 +477,7 @@ my_region_serial = my_region_serial, my_region_start = my_region_start, my_region_end = my_region_end) + all_base_mapped += len(original_BS) else : print "[For debug]: reads not in same lengths" @@ -507,76 +514,74 @@ #--- Checking input format ------------------------------------------ try : - read_inf=open(read_file,"r") + if read_file.endswith(".gz") : # support input file ending with ".gz" + read_inf = gzip.open(read_file, "rb") + else : + read_inf=open(read_file,"r") except IOError: print "[Error] Cannot open input file : %s" % read_file exit(-1) - oneline=read_inf.readline() - l=oneline.split() - n_fastq=0 - n_fasta=0 - input_format="" - if oneline[0]=="@": # FastQ - input_format="fastq" - elif len(l)==1 and oneline[0]!=">": # pure sequences - input_format="seq" - elif len(l)==11: # Illumina qseq - input_format="qseq" - elif oneline[0]==">": # fasta - input_format="fasta" + oneline = read_inf.readline() + l = oneline.split() + input_format = "" + if oneline[0]=="@": + input_format = "fastq" + elif len(l)==1 and oneline[0]!=">" : + input_format = "seq" + elif len(l)==11: + input_format = "qseq" + elif oneline[0]==">" : + input_format = "fasta" read_inf.close() #---------------------------------------------------------------- - seq_id = "" + read_id = "" seq = "" - seq_ready=0 - for line in fileinput.input(read_file): - l=line.split() - - if input_format == "seq": - all_raw_reads+=1 - seq_id=str(all_raw_reads) - seq_id=seq_id.zfill(12) - seq=l[0] - seq_ready="Y" + seq_ready = 0 + line_no = 0 + for line in fileinput.input(read_file, openhook=fileinput.hook_compressed): + l = line.split() + line_no += 1 + if input_format=="seq": + all_raw_reads += 1 + read_id = str(all_raw_reads) + read_id = read_id.zfill(12) + seq = l[0] + seq_ready = "Y" elif input_format=="fastq": - m_fastq=math.fmod(n_fastq,4) - n_fastq+=1 - seq_ready="N" - if m_fastq==0: - all_raw_reads+=1 - seq_id=str(all_raw_reads) - seq_id=seq_id.zfill(12) - seq="" - elif m_fastq==1: - seq=l[0] - seq_ready="Y" - else: - seq="" + l_fastq = math.fmod(line_no, 4) + if l_fastq == 1 : + all_raw_reads += 1 + read_id = l[0][1:] + seq_ready = "N" + elif l_fastq == 2 : + seq = l[0] + seq_ready = "Y" + else : + seq = "" + seq_ready = "N" elif input_format=="qseq": - all_raw_reads+=1 - seq_id=str(all_raw_reads) - seq_id=seq_id.zfill(12) - seq=l[8] - seq_ready="Y" - elif input_format=="fasta": - m_fasta=math.fmod(n_fasta,2) - n_fasta+=1 - seq_ready="N" - if m_fasta==0: - all_raw_reads+=1 - seq_id=l[0][1:] - seq="" - elif m_fasta==1: - seq=l[0] - seq_ready="Y" - else: - seq="" - #--------------------------------------------------------------- + all_raw_reads += 1 + read_id = str(all_raw_reads) + read_id = read_id.zfill(12) + seq = l[8] + seq_ready = "Y" + elif input_format=="fasta" : + l_fasta = math.fmod(line_no,2) + if l_fasta==1: + all_raw_reads += 1 + read_id = l[0][1:] + seq = "" + seq_ready = "N" + elif l_fasta==0 : + seq = l[0] + seq_ready = "Y" + + #--------------------------------------------------------------- if seq_ready=="Y": # Normalize the characters - seq=seq.upper().replace(".","N") + seq = seq.upper().replace(".","N") read_tag = [ m for m,n in [ (i, len(i)) for i in uniq(cut3_tag_lst)] if seq[0:n] == m ] if len(read_tag) > 0 : @@ -587,21 +592,23 @@ seq = seq[(cut_s-1):cut_e] # cut_s start from 1 cycle by default #-- Trimming adapter sequence --- + all_base_before_trim += len(seq) if adapter_seq != "" : - new_read = RemoveAdapter(seq, adapter_seq, adapter_mismatch) + new_read = RemoveAdapter(seq, adapter_seq, adapter_mismatch, Extra_base_cut_5end_adapter) if len(new_read) < len(seq) : all_tagged_trimmed += 1 seq = new_read + all_base_after_trim += len(seq) if len(seq) <= 4 : seq = "N" * (cut_e - cut_s) # all reads will be considered, regardless of tags #--------- trimmed_raw_BS_read and qscore ------------------ - original_bs_reads[seq_id] = seq + original_bs_reads[read_id] = seq #--------- FW_C2T ------------------ - outf2.write('>%s\n%s\n'%(seq_id, seq.replace('C', 'T'))) + outf2.write('>%s\n%s\n'%(read_id, seq.replace('C', 'T'))) #--------- RC_G2A ------------------ - outf3.write('>%s\n%s\n' % (seq_id, seq.replace("G","A"))) + outf3.write('>%s\n%s\n' % (read_id, seq.replace("G","A"))) fileinput.close() outf2.close() @@ -612,10 +619,10 @@ # mapping #-------------------------------------------------------------------------------- - WC2T=tmp_d("W_C2T_m"+max_mismatch_no+".mapping"+random_id) - CC2T=tmp_d("C_C2T_m"+max_mismatch_no+".mapping"+random_id) - WG2A=tmp_d("W_G2A_m"+max_mismatch_no+".mapping"+random_id) - CG2A=tmp_d("C_G2A_m"+max_mismatch_no+".mapping"+random_id) + WC2T = tmp_d("W_C2T_m"+max_mismatch_no+".mapping"+random_id) + CC2T = tmp_d("C_C2T_m"+max_mismatch_no+".mapping"+random_id) + WG2A = tmp_d("W_G2A_m"+max_mismatch_no+".mapping"+random_id) + CG2A = tmp_d("C_G2A_m"+max_mismatch_no+".mapping"+random_id) run_in_parallel([ aligner_command % {'reference_genome' : os.path.join(db_path,'W_C2T'), 'input_file' : outfile2, @@ -638,37 +645,37 @@ # Post processing #-------------------------------------------------------------------------------- - FW_C2T_U,FW_C2T_R=extract_mapping(WC2T) - RC_G2A_U,RC_G2A_R=extract_mapping(CG2A) + FW_C2T_U,FW_C2T_R = extract_mapping(WC2T) + RC_G2A_U,RC_G2A_R = extract_mapping(CG2A) - FW_G2A_U,FW_G2A_R=extract_mapping(WG2A) - RC_C2T_U,RC_C2T_R=extract_mapping(CC2T) + FW_G2A_U,FW_G2A_R = extract_mapping(WG2A) + RC_C2T_U,RC_C2T_R = extract_mapping(CC2T) logm("Extracting alignments is done") #---------------------------------------------------------------- # get unique-hit reads #---------------------------------------------------------------- - Union_set=set(FW_C2T_U.iterkeys()) | set(RC_G2A_U.iterkeys()) | set(FW_G2A_U.iterkeys()) | set(RC_C2T_U.iterkeys()) + Union_set = set(FW_C2T_U.iterkeys()) | set(RC_G2A_U.iterkeys()) | set(FW_G2A_U.iterkeys()) | set(RC_C2T_U.iterkeys()) - Unique_FW_C2T=set() # + - Unique_RC_G2A=set() # + - Unique_FW_G2A=set() # - - Unique_RC_C2T=set() # - - Multiple_hits=set() + Unique_FW_C2T = set() # + + Unique_RC_G2A = set() # + + Unique_FW_G2A = set() # - + Unique_RC_C2T = set() # - + Multiple_hits = set() - for x in Union_set: - _list=[] + for x in Union_set : + _list = [] for dx in [FW_C2T_U, RC_G2A_U, FW_G2A_U, RC_C2T_U]: - mis_lst=dx.get(x,[99]) - mis=int(mis_lst[0]) + mis_lst = dx.get(x,[99]) + mis = int(mis_lst[0]) _list.append(mis) for dx in [FW_C2T_R, RC_G2A_R, FW_G2A_R, RC_C2T_R]: - mis=dx.get(x,99) + mis = dx.get(x,99) _list.append(mis) - mini=min(_list) + mini = min(_list) if _list.count(mini) == 1: - mini_index=_list.index(mini) + mini_index = _list.index(mini) if mini_index == 0: Unique_FW_C2T.add(x) elif mini_index == 1: @@ -683,7 +690,7 @@ Multiple_hits.add(x) # write reads rejected by Multiple Hits to file if show_multiple_hit : - outf_MH=open("Multiple_hit.fa",'w') + outf_MH = open("Multiple_hit.fa",'w') for i in Multiple_hits : outf_MH.write(">%s\n" % i) outf_MH.write("%s\n" % original_bs_reads[i]) @@ -695,18 +702,18 @@ del RC_C2T_R del RC_G2A_R - FW_C2T_uniq_lst=[[FW_C2T_U[u][1],u] for u in Unique_FW_C2T] - FW_G2A_uniq_lst=[[FW_G2A_U[u][1],u] for u in Unique_FW_G2A] - RC_C2T_uniq_lst=[[RC_C2T_U[u][1],u] for u in Unique_RC_C2T] - RC_G2A_uniq_lst=[[RC_G2A_U[u][1],u] for u in Unique_RC_G2A] + FW_C2T_uniq_lst = [[FW_C2T_U[u][1],u] for u in Unique_FW_C2T] + FW_G2A_uniq_lst = [[FW_G2A_U[u][1],u] for u in Unique_FW_G2A] + RC_C2T_uniq_lst = [[RC_C2T_U[u][1],u] for u in Unique_RC_C2T] + RC_G2A_uniq_lst = [[RC_G2A_U[u][1],u] for u in Unique_RC_G2A] FW_C2T_uniq_lst.sort() RC_C2T_uniq_lst.sort() FW_G2A_uniq_lst.sort() RC_G2A_uniq_lst.sort() - FW_C2T_uniq_lst=[x[1] for x in FW_C2T_uniq_lst] - RC_C2T_uniq_lst=[x[1] for x in RC_C2T_uniq_lst] - FW_G2A_uniq_lst=[x[1] for x in FW_G2A_uniq_lst] - RC_G2A_uniq_lst=[x[1] for x in RC_G2A_uniq_lst] + FW_C2T_uniq_lst = [x[1] for x in FW_C2T_uniq_lst] + RC_C2T_uniq_lst = [x[1] for x in RC_C2T_uniq_lst] + FW_G2A_uniq_lst = [x[1] for x in FW_G2A_uniq_lst] + RC_G2A_uniq_lst = [x[1] for x in RC_G2A_uniq_lst] del Unique_FW_C2T del Unique_FW_G2A @@ -716,7 +723,7 @@ #---------------------------------------------------------------- # Post-filtering reads - # ---- FW_C2T ---- undirectional + # ---- FW_C2T ---- un-directional FW_regions = dict() gseq = dict() chr_length = dict() @@ -758,7 +765,9 @@ try_count += 1 N_mismatch = N_MIS(r_aln, g_aln) - if N_mismatch <= int(max_mismatch_no) : +# if N_mismatch <= int(max_mismatch_no) : + mm_no=float(max_mismatch_no) + if (mm_no>=1 and N_mismatch<=mm_no) or (mm_no<1 and N_mismatch<=(mm_no*len(r_aln)) ): all_mapped_passed += 1 methy = methy_seq(r_aln, g_aln + next2bp) mC_lst, uC_lst = mcounts(methy, mC_lst, uC_lst) @@ -776,11 +785,12 @@ my_region_serial = my_region_serial, my_region_start = my_region_start, my_region_end = my_region_end) + all_base_mapped += len(original_BS) else : print "[For debug]: reads not in same lengths" - # ---- RC_C2T ---- undirectional + # ---- RC_C2T ---- un-directional RC_regions = dict() for header in RC_C2T_uniq_lst : _, mapped_chr, mapped_location, cigar = RC_C2T_U[header] @@ -821,7 +831,9 @@ try_count += 1 N_mismatch = N_MIS(r_aln, g_aln) - if N_mismatch <= int(max_mismatch_no) : +# if N_mismatch <= int(max_mismatch_no) : + mm_no=float(max_mismatch_no) + if (mm_no>=1 and N_mismatch<=mm_no) or (mm_no<1 and N_mismatch<=(mm_no*len(r_aln)) ): all_mapped_passed += 1 methy = methy_seq(r_aln, g_aln + next2bp) mC_lst, uC_lst = mcounts(methy, mC_lst, uC_lst) @@ -839,12 +851,13 @@ my_region_serial = my_region_serial, my_region_start = my_region_start, my_region_end = my_region_end) + all_base_mapped += len(original_BS) else : print "[For debug]: reads not in same lengths" - # ---- FW_G2A ---- undirectional + # ---- FW_G2A ---- un-directional FW_regions = dict() gseq = dict() chr_length = dict() @@ -891,9 +904,10 @@ # print "[For debug]: FW_G2A read still can not find fragment serial" # Tip: sometimes "my_region_serial" is still 0 ... - N_mismatch = N_MIS(r_aln, g_aln) - if N_mismatch <= int(max_mismatch_no) : +# if N_mismatch <= int(max_mismatch_no) : + max_mismatch_no=float(max_mismatch_no) + if (mm_no>=1 and N_mismatch<=mm_no) or (mm_no<1 and N_mismatch<=(mm_no*len(r_aln)) ): all_mapped_passed += 1 methy = methy_seq(r_aln, g_aln + next2bp) mC_lst, uC_lst = mcounts(methy, mC_lst, uC_lst) @@ -911,11 +925,12 @@ my_region_serial = my_region_serial, my_region_start = my_region_start, my_region_end = my_region_end) + all_base_mapped += len(original_BS) else : print "[For debug]: reads not in same lengths" - # ---- RC_G2A ---- undirectional + # ---- RC_G2A ---- un-directional RC_regions = dict() for header in RC_G2A_uniq_lst : _, mapped_chr, mapped_location, cigar = RC_G2A_U[header] @@ -961,9 +976,10 @@ # print "[For debug]: chr=", mapped_chr # print "[For debug]: RC_C2A read still cannot find fragment serial" - N_mismatch = N_MIS(r_aln, g_aln) - if N_mismatch <= int(max_mismatch_no) : +# if N_mismatch <= int(max_mismatch_no) : + mm_no=float(max_mismatch_no) + if (mm_no>=1 and N_mismatch<=mm_no) or (mm_no<1 and N_mismatch<=(mm_no*len(r_aln)) ): all_mapped_passed += 1 methy = methy_seq(r_aln, g_aln + next2bp) mC_lst, uC_lst = mcounts(methy, mC_lst, uC_lst) @@ -981,37 +997,38 @@ my_region_serial = my_region_serial, my_region_start = my_region_start, my_region_end = my_region_end) + all_base_mapped += len(original_BS) else : print "[For debug]: reads not in same lengths" - - # Finished both FW and RC logm("Done: %s (%d) \n" % (read_file, no_my_files)) print "--> %s (%d) "%(read_file, no_my_files) del original_bs_reads delete_files(WC2T, CC2T, WG2A, CG2A) - - - # End of un-directional library delete_files(tmp_path) - logm("O Number of raw reads: %d "% all_raw_reads) - if all_raw_reads >0: - logm("O Number of CGG/TGG tagged reads: %d (%1.3f)"%(all_tagged,float(all_tagged)/all_raw_reads)) + logm("Number of raw reads: %d "% all_raw_reads) + if all_raw_reads>0: + logm("Number of raw reads with CGG/TGG at 5' end: %d (%1.3f)"%(all_tagged,float(all_tagged)/all_raw_reads)) for kk in range(len(n_cut_tag_lst)): - logm("O Number of raw reads with %s tag: %d (%1.3f)"%(cut3_tag_lst[kk],n_cut_tag_lst[cut3_tag_lst[kk]],float(n_cut_tag_lst[cut3_tag_lst[kk]])/all_raw_reads)) - logm("O Number of CGG/TGG reads having adapter removed: %d "%all_tagged_trimmed) - logm("O Number of reads rejected because of multiple hits: %d\n" % len(Multiple_hits) ) - logm("O Number of unique-hits reads for post-filtering: %d"%all_mapped) + logm("Number of raw reads with tag %s: %d (%1.3f)"%(cut3_tag_lst[kk],n_cut_tag_lst[cut3_tag_lst[kk]],float(n_cut_tag_lst[cut3_tag_lst[kk]])/all_raw_reads)) + if adapter_seq!="" : + logm("Number of reads having adapter removed: %d "%all_tagged_trimmed) + logm("Number of bases in total: %d "%all_base_before_trim) + if adapter_seq!="" : + logm("Number of bases after trimming the adapters: %d (%1.3f)"%(all_base_after_trim, float(all_base_after_trim)/all_base_before_trim ) ) + logm("Number of reads are rejected because of multiple hits: %d\n" % len(Multiple_hits) ) + logm("Number of unique-hits reads (before post-filtering): %d"%all_mapped) - logm("O ------ %d uniquely aligned reads, passed fragment check, with mismatches <= %s"%(all_mapped_passed, max_mismatch_no)) - logm("O Mappability= %1.4f%%"%(100*float(all_mapped_passed)/all_raw_reads)) + logm("------ %d uniquely aligned reads, passed fragment check, with mismatches <= %s"%(all_mapped_passed, max_mismatch_no)) + logm("Mappability= %1.4f%%"%(100*float(all_mapped_passed)/all_raw_reads)) + logm("Total bases of uniquely mapped reads %7d"% all_base_mapped ) - if asktag=="Y": # undiretional + if asktag=="Y": # un-diretional logm(" ---- %7d FW reads mapped to Watson strand"%(num_mapped_FW_C2T) ) logm(" ---- %7d RC reads mapped to Watson strand"%(num_mapped_FW_G2A) ) logm(" ---- %7d FW reads mapped to Crick strand"%(num_mapped_RC_C2T) ) @@ -1021,16 +1038,17 @@ elif asktag=="N": # directional logm(" ---- %7d FW reads mapped to Watson strand"%(num_mapped_FW_C2T) ) logm(" ---- %7d FW reads mapped to Crick strand"%(num_mapped_RC_C2T) ) + logm("Total bases of uniquely mapped reads %7d"% all_base_mapped ) - n_CG=mC_lst[0]+uC_lst[0] - n_CHG=mC_lst[1]+uC_lst[1] - n_CHH=mC_lst[2]+uC_lst[2] + n_CG = mC_lst[0] + uC_lst[0] + n_CHG = mC_lst[1] + uC_lst[1] + n_CHH = mC_lst[2] + uC_lst[2] logm("----------------------------------------------") - logm("M Methylated C in mapped reads ") - logm("M mCG %1.3f%%"%((100*float(mC_lst[0])/n_CG) if n_CG != 0 else 0)) - logm("M mCHG %1.3f%%"%((100*float(mC_lst[1])/n_CHG) if n_CHG != 0 else 0)) - logm("M mCHH %1.3f%%"%((100*float(mC_lst[2])/n_CHH) if n_CHH != 0 else 0)) + logm("Methylated C in mapped reads ") + logm(" mCG %1.3f%%"%( (100 * float(mC_lst[0]) / n_CG) if n_CG!=0 else 0)) + logm(" mCHG %1.3f%%"%( (100 * float(mC_lst[1]) / n_CHG) if n_CHG!=0 else 0)) + logm(" mCHH %1.3f%%"%( (100 * float(mC_lst[2]) / n_CHH) if n_CHH!=0 else 0)) logm("----------------------------------------------") logm("------------------- END ----------------------") diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/bs_align/bs_single_end.py --- a/BSseeker2/bs_align/bs_single_end.py Fri Jul 12 18:47:28 2013 -0400 +++ b/BSseeker2/bs_align/bs_single_end.py Tue Nov 05 01:55:39 2013 -0500 @@ -1,6 +1,7 @@ import fileinput, os, time, random, math from bs_utils.utils import * from bs_align_utils import * +import gzip #---------------------------------------------------------------- # Read from the mapped results, return lists of unique / multiple-hit reads @@ -77,20 +78,23 @@ #---------------------------------------------------------------- - logm("Read filename: %s"% main_read_file ) - logm("Un-directional library: %s" % asktag ) - logm("The first base (for mapping): %d" % cut1) - logm("The last base (for mapping): %d" % cut2) - logm("Max. lines per mapping: %d"% no_small_lines) - logm("Aligner: %s" % aligner_command) - logm("Reference genome library path: %s" % db_path ) - logm("Number of mismatches allowed: %s" % max_mismatch_no ) + logm("Read filename: %s" % main_read_file) + logm("The first base (for mapping): %d"% cut1 ) + logm("The last base (for mapping): %d"% cut2 ) + + logm("Path for short reads aligner: %s"% aligner_command + '\n') + logm("Reference genome library path: %s"% db_path ) + + if asktag == "Y" : + logm("Un-directional library" ) + else : + logm("Directional library") + + logm("Number of mismatches allowed: %s"% max_mismatch_no ) + if adapter_file !="": - if asktag=="N": - logm("Adapter to be removed from 3' reads: %s"%(adapter.rstrip("\n"))) - elif asktag=="Y": - logm("Adapter to be removed from 3' FW reads: %s"%(adapter_fw.rstrip("\n")) ) - logm("Adapter to be removed from 3' RC reads: %s"%(adapter_rc.rstrip("\n")) ) + logm("Adapter seq: %s" % adapter_fw) + logm("-------------------------------- " ) #---------------------------------------------------------------- # helper method to join fname with tmp_path @@ -109,9 +113,12 @@ #---- Stats ------------------------------------------------------------ all_raw_reads=0 - all_trimed=0 + all_trimmed=0 all_mapped=0 all_mapped_passed=0 + all_base_before_trim=0 + all_base_after_trim=0 + all_base_mapped=0 numbers_premapped_lst=[0,0,0,0] numbers_mapped_lst=[0,0,0,0] @@ -119,7 +126,6 @@ mC_lst=[0,0,0] uC_lst=[0,0,0] - no_my_files=0 #---------------------------------------------------------------- @@ -132,7 +138,7 @@ random_id = ".tmp-"+str(random.randint(1000000,9999999)) #------------------------------------------------------------------- - # undirectional sequencing + # un-directional sequencing #------------------------------------------------------------------- if asktag=="Y": @@ -146,74 +152,70 @@ #---------------------------------------------------------------- # detect format of input file try : - read_inf=open(read_file,"r") + if read_file.endswith(".gz") : # support input file ending with ".gz" + read_inf = gzip.open(read_file, "rb") + else : + read_inf=open(read_file,"r") except IOError : print "[Error] Cannot open input file : %s" % read_file exit(-1) - oneline=read_inf.readline() - l=oneline.split() - input_format="" - if oneline[0]=="@": # fastq - input_format="fastq" - n_fastq=0 - elif len(l)==1 and oneline[0]!=">": # pure sequences - input_format="seq" - elif len(l)==11: # qseq - input_format="qseq" - elif oneline[0]==">": # fasta - input_format="fasta" - n_fasta=0 + oneline = read_inf.readline() + l = oneline.split() + input_format = "" + if oneline[0]=="@": + input_format = "fastq" + elif len(l)==1 and oneline[0]!=">": + input_format = "seq" + elif len(l)==11: + input_format = "qseq" + elif oneline[0]==">": + input_format = "fasta" read_inf.close() #---------------------------------------------------------------- - # read sequence, remove adapter and convert - read_id="" - seq="" - seq_ready="N" - for line in fileinput.input(read_file): - l=line.split() - + # read sequence, remove adapter and convert + read_id = "" + seq = "" + seq_ready = "N" + line_no = 0 + for line in fileinput.input(read_file, openhook=fileinput.hook_compressed): # allow input with .gz + l = line.split() + line_no += 1 if input_format=="seq": - all_raw_reads+=1 - read_id=str(all_raw_reads) - read_id=read_id.zfill(12) - seq=l[0] - seq_ready="Y" + all_raw_reads += 1 + read_id = str(all_raw_reads) + read_id = read_id.zfill(12) + seq = l[0] + seq_ready = "Y" elif input_format=="fastq": - m_fastq=math.fmod(n_fastq,4) - n_fastq+=1 - seq_ready="N" - if m_fastq==0: - all_raw_reads+=1 - read_id=str(all_raw_reads) - read_id=read_id.zfill(12) - seq="" - elif m_fastq==1: - seq=l[0] - seq_ready="Y" - else: - seq="" + l_fastq = math.fmod(line_no, 4) + if l_fastq == 1 : + all_raw_reads += 1 + read_id = l[0][1:] + seq_ready = "N" + elif l_fastq == 2 : + seq = l[0] + seq_ready = "Y" + else : + seq = "" + seq_ready = "N" elif input_format=="qseq": - all_raw_reads+=1 - read_id=str(all_raw_reads) - read_id=read_id.zfill(12) - seq=l[8] - seq_ready="Y" - elif input_format=="fasta": - m_fasta=math.fmod(n_fasta,2) - n_fasta+=1 - seq_ready="N" - if m_fasta==0: - all_raw_reads+=1 - #read_id=str(all_raw_reads) - read_id=l[0][1:] - seq="" - elif m_fasta==1: - seq=l[0] - seq_ready="Y" - else: - seq="" + all_raw_reads += 1 + read_id = str(all_raw_reads) + read_id = read_id.zfill(12) + seq = l[8] + seq_ready = "Y" + elif input_format=="fasta" : + l_fasta = math.fmod(line_no,2) + if l_fasta==1: + all_raw_reads += 1 + read_id = l[0][1:] + seq = "" + seq_ready = "N" + elif l_fasta==0 : + seq = l[0] + seq_ready = "Y" #---------------------------------------------------------------- if seq_ready=="Y": @@ -222,13 +224,14 @@ seq=seq.replace(".","N") # striping BS adapter from 3' read + all_base_before_trim += len(seq) if (adapter_fw !="") and (adapter_rc !="") : new_read = RemoveAdapter(seq, adapter_fw, adapter_mismatch) new_read = Remove_5end_Adapter(new_read, adapter_rc) if len(new_read) < len(seq) : - all_trimed += 1 + all_trimmed += 1 seq = new_read - + all_base_after_trim += len(seq) if len(seq)<=4: seq=''.join(["N" for x in xrange(cut2-cut1+1)]) @@ -353,18 +356,18 @@ RC_C2T_uniq_lst=[x[1] for x in RC_C2T_uniq_lst] FW_G2A_uniq_lst=[x[1] for x in FW_G2A_uniq_lst] RC_G2A_uniq_lst=[x[1] for x in RC_G2A_uniq_lst] + #---------------------------------------------------------------- + + numbers_premapped_lst[0] += len(Unique_FW_C2T) + numbers_premapped_lst[1] += len(Unique_RC_G2A) + numbers_premapped_lst[2] += len(Unique_FW_G2A) + numbers_premapped_lst[3] += len(Unique_RC_C2T) del Unique_FW_C2T del Unique_FW_G2A del Unique_RC_C2T del Unique_RC_G2A - #---------------------------------------------------------------- - numbers_premapped_lst[0] += len(Unique_FW_C2T) - numbers_premapped_lst[1] += len(Unique_RC_G2A) - numbers_premapped_lst[2] += len(Unique_FW_G2A) - numbers_premapped_lst[3] += len(Unique_RC_C2T) - #---------------------------------------------------------------- @@ -376,7 +379,6 @@ (FW_G2A_uniq_lst,FW_G2A_U), (RC_C2T_uniq_lst,RC_C2T_U)]: nn += 1 - mapped_chr0 = "" for header in ali_unique_lst: @@ -422,7 +424,9 @@ if len(r_aln)==len(g_aln): N_mismatch = N_MIS(r_aln, g_aln) - if N_mismatch <= int(max_mismatch_no): +# if N_mismatch <= int(max_mismatch_no): + mm_no=float(max_mismatch_no) + if (mm_no>=1 and N_mismatch<=mm_no) or (mm_no<1 and N_mismatch<=(mm_no*len(r_aln)) ): numbers_mapped_lst[nn-1] += 1 all_mapped_passed += 1 methy = methy_seq(r_aln, g_aln + next) @@ -436,6 +440,7 @@ XS = 1 outfile.store(header, N_mismatch, FR, mapped_chr, mapped_strand, mapped_location, cigar, original_BS, methy, XS, output_genome = output_genome) + all_base_mapped += len(original_BS) #---------------------------------------------------------------- logm("--> %s (%d) "%(read_file, no_my_files)) @@ -449,78 +454,74 @@ if asktag=="N": #---------------------------------------------------------------- - outfile2=tmp_d('Trimed_C2T.fa'+random_id) + outfile2=tmp_d('Trimmed_C2T.fa'+random_id) outf2=open(outfile2,'w') - - n=0 #---------------------------------------------------------------- try : - read_inf=open(read_file,"r") + if read_file.endswith(".gz") : # support input file ending with ".gz" + read_inf = gzip.open(read_file, "rb") + else : + read_inf=open(read_file,"r") except IOError : print "[Error] Cannot open input file : %s" % read_file exit(-1) - oneline=read_inf.readline() - l=oneline.split() - input_format="" - if oneline[0]=="@": # FastQ - input_format="fastq" - n_fastq=0 - elif len(l)==1 and oneline[0]!=">": # pure sequences - input_format="seq" - elif len(l)==11: # Illumina GAII qseq file - input_format="qseq" - elif oneline[0]==">": # fasta - input_format="fasta" - n_fasta=0 + oneline = read_inf.readline() + l = oneline.split() + input_format = "" + if oneline[0]=="@": + input_format = "fastq" + elif len(l)==1 and oneline[0]!=">": + input_format = "seq" + elif len(l)==11: + input_format = "qseq" + elif oneline[0]==">": + input_format = "fasta" read_inf.close() + #print "detected data format: %s"%(input_format) #---------------------------------------------------------------- read_id="" seq="" seq_ready="N" - for line in fileinput.input(read_file): - l=line.split() + line_no = 0 + for line in fileinput.input(read_file, openhook=fileinput.hook_compressed): + l = line.split() + line_no += 1 if input_format=="seq": - all_raw_reads+=1 - read_id=str(all_raw_reads) - read_id=read_id.zfill(12) - seq=l[0] - seq_ready="Y" + all_raw_reads += 1 + read_id = str(all_raw_reads) + read_id = read_id.zfill(12) + seq = l[0] + seq_ready = "Y" elif input_format=="fastq": - m_fastq=math.fmod(n_fastq,4) - n_fastq+=1 - seq_ready="N" - if m_fastq==0: - all_raw_reads+=1 - read_id=str(all_raw_reads) - read_id=read_id.zfill(12) - seq="" - elif m_fastq==1: - seq=l[0] - seq_ready="Y" - else: - seq="" + l_fastq = math.fmod(line_no, 4) + if l_fastq == 1 : + all_raw_reads += 1 + read_id = l[0][1:] + seq_ready = "N" + elif l_fastq == 2 : + seq = l[0] + seq_ready = "Y" + else : + seq = "" + seq_ready = "N" elif input_format=="qseq": - all_raw_reads+=1 - read_id=str(all_raw_reads) - read_id=read_id.zfill(12) - seq=l[8] - seq_ready="Y" - elif input_format=="fasta": - m_fasta=math.fmod(n_fasta,2) - n_fasta+=1 - seq_ready="N" - if m_fasta==0: - all_raw_reads+=1 - read_id=l[0][1:] - seq="" - elif m_fasta==1: - seq=l[0] - seq_ready="Y" - else: - seq="" - + all_raw_reads += 1 + read_id = str(all_raw_reads) + read_id = read_id.zfill(12) + seq = l[8] + seq_ready = "Y" + elif input_format=="fasta" : + l_fasta = math.fmod(line_no,2) + if l_fasta==1: + all_raw_reads += 1 + read_id = l[0][1:] + seq = "" + seq_ready = "N" + elif l_fasta==0 : + seq = l[0] + seq_ready = "Y" #-------------------------------- if seq_ready=="Y": seq=seq[cut1-1:cut2] #<---selecting 0..52 from 1..72 -e 52 @@ -528,12 +529,13 @@ seq=seq.replace(".","N") #--striping adapter from 3' read ------- + all_base_before_trim += len(seq) if adapter != "": new_read = RemoveAdapter(seq, adapter, adapter_mismatch) if len(new_read) < len(seq) : - all_trimed += 1 + all_trimmed += 1 seq = new_read - + all_base_after_trim += len(seq) if len(seq)<=4: seq = "N" * (cut2-cut1+1) @@ -612,7 +614,6 @@ outf_MH.close() - FW_C2T_uniq_lst=[[FW_C2T_U[u][1],u] for u in Unique_FW_C2T] RC_C2T_uniq_lst=[[RC_C2T_U[u][1],u] for u in Unique_RC_C2T] FW_C2T_uniq_lst.sort() @@ -633,7 +634,6 @@ chr_length = dict() for ali_unique_lst, ali_dic in [(FW_C2T_uniq_lst,FW_C2T_U),(RC_C2T_uniq_lst,RC_C2T_U)]: nn += 1 - mapped_chr0 = "" for header in ali_unique_lst: _, mapped_chr, mapped_location, cigar = ali_dic[header] original_BS = original_bs_reads[header] @@ -641,11 +641,6 @@ if mapped_chr not in gseq : gseq[mapped_chr] = deserialize(db_d(mapped_chr)) chr_length[mapped_chr] = len(gseq[mapped_chr]) - #if mapped_chr != mapped_chr0: - # my_gseq = deserialize(db_d(mapped_chr)) - # chr_length = len(my_gseq) - # mapped_chr0 = mapped_chr - #------------------------------------- r_start, r_end, g_len = get_read_start_end_and_genome_length(cigar) @@ -664,7 +659,8 @@ if len(r_aln) == len(g_aln): N_mismatch = N_MIS(r_aln, g_aln) #+ original_BS_length - (r_end - r_start) # mismatches in the alignment + soft clipped nucleotides - if N_mismatch <= int(max_mismatch_no): + mm_no=float(max_mismatch_no) + if (mm_no>=1 and N_mismatch<=mm_no) or (mm_no<1 and N_mismatch<=(mm_no*len(r_aln)) ): numbers_mapped_lst[nn-1] += 1 all_mapped_passed += 1 methy = methy_seq(r_aln, g_aln+next) @@ -678,6 +674,7 @@ XS = 1 outfile.store(header, N_mismatch, FR, mapped_chr, mapped_strand, mapped_location, cigar, original_BS, methy, XS, output_genome = output_genome) + all_base_mapped += len(original_BS) #---------------------------------------------------------------- logm("--> %s (%d) "%(read_file,no_my_files)) @@ -686,14 +683,16 @@ #---------------------------------------------------------------- -# outf.close() delete_files(tmp_path) logm("Number of raw reads: %d \n"% all_raw_reads) if all_raw_reads >0: - logm("Number of reads having adapter removed: %d \n" % all_trimed ) - logm("Number of reads rejected because of multiple hits: %d\n" % len(Multiple_hits) ) - logm("Number of unique-hits reads for post-filtering: %d\n" % all_mapped) + logm("Number of bases in total: %d "%all_base_before_trim) + if adapter != "" : + logm("Number of reads having adapter removed: %d \n" % all_trimmed ) + logm("Number of bases after trimming the adapters: %d (%1.3f)"%(all_base_after_trim, float(all_base_after_trim)/all_base_before_trim) ) + logm("Number of reads are rejected because of multiple hits: %d\n" % len(Multiple_hits) ) + logm("Number of unique-hits reads (before post-filtering): %d\n" % all_mapped) if asktag=="Y": logm(" ---- %7d FW reads mapped to Watson strand (before post-filtering)"%(numbers_premapped_lst[0]) ) logm(" ---- %7d RC reads mapped to Watson strand (before post-filtering)"%(numbers_premapped_lst[1]) ) @@ -713,6 +712,8 @@ logm(" ---- %7d FW reads mapped to Watson strand"%(numbers_mapped_lst[0]) ) logm(" ---- %7d FW reads mapped to Crick strand"%(numbers_mapped_lst[1]) ) logm("Mappability= %1.4f%%"%(100*float(all_mapped_passed)/all_raw_reads) ) + logm("Total bases of uniquely mapped reads %7d"% all_base_mapped ) + n_CG=mC_lst[0]+uC_lst[0] n_CHG=mC_lst[1]+uC_lst[1] diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/bs_align/output.py --- a/BSseeker2/bs_align/output.py Fri Jul 12 18:47:28 2013 -0400 +++ b/BSseeker2/bs_align/output.py Tue Nov 05 01:55:39 2013 -0500 @@ -81,3 +81,53 @@ ('XG', output_genome)) self.f.write(a) + + + def store2(self, qname, flag, N_mismatch, FR, refname, strand, pos, cigar, original_BS, methy, STEVE, rnext = -1, pnext = -1, qual = None, output_genome = None, + rrbs = False, my_region_serial = -1, my_region_start = 0, my_region_end = 0): + + if self.format == BS_SEEKER1: + + # remove the soft clipped bases from the read + # this is done for backwards compatibility with the old format + r_start, r_end, _ = get_read_start_end_and_genome_length(cigar) + original_BS = original_BS[r_start : r_end] + + if rrbs: + self.f.write('%s\t%2d\t%s\t%s%s%s\t%s\t%s\t%s\t%d\n' % (qname, N_mismatch, FR, refname, strand, str(pos+1).zfill(10), output_genome, original_BS, methy, STEVE)) + else: + self.f.write('%s\t%2d\t%s\t%s%s%s\t%s\t%s\t%s\t%d\t%d\t%d\t%d\n' % (qname, N_mismatch, FR, refname, strand, str(pos+1).zfill(10), output_genome, original_BS, methy, my_region_serial, my_region_start, my_region_end, STEVE)) + + + elif self.format == BAM or self.format == SAM: + + a = pysam.AlignedRead() + a.qname = qname + a.seq = original_BS if strand == '+' else reverse_compl_seq(original_BS) + a.flag = flag + a.tid = self.chrom_ids[refname] + a.pos = pos + a.mapq = 255 + a.cigar = cigar if strand == '+' else list(reversed(cigar)) + a.rnext = rnext if rnext == -1 else self.chrom_ids[rnext] + a.pnext = pnext + a.qual= qual + if rrbs: + a.tags = (('XO', FR), + ('XS', STEVE), + ('NM', N_mismatch), + ('XM', methy), + ('XG', output_genome), + ('YR', my_region_serial), + ('YS', my_region_start), + ('YE', my_region_end) + ) + + else: + a.tags = (('XO', FR), + ('XS', STEVE), + ('NM', N_mismatch), + ('XM', methy), + ('XG', output_genome)) + + self.f.write(a) diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/bs_index/.___init__.py Binary file BSseeker2/bs_index/.___init__.py has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/bs_index/._rrbs_build.py Binary file BSseeker2/bs_index/._rrbs_build.py has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/bs_index/._wg_build.py Binary file BSseeker2/bs_index/._wg_build.py has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/bs_index/wg_build.py --- a/BSseeker2/bs_index/wg_build.py Fri Jul 12 18:47:28 2013 -0400 +++ b/BSseeker2/bs_index/wg_build.py Tue Nov 05 01:55:39 2013 -0500 @@ -3,8 +3,8 @@ def wg_build(fasta_file, build_command, ref_path, aligner): - # ref_path is a string that containts the directory where the reference genomes are stored with - # the input fasta filename appended + # ref_path is a string that contains the directory where the reference genomes are stored with + # the input Fasta filename appended ref_path = os.path.join(ref_path, os.path.split(fasta_file)[1] + '_'+aligner) diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/bs_seeker2-align.py --- a/BSseeker2/bs_seeker2-align.py Fri Jul 12 18:47:28 2013 -0400 +++ b/BSseeker2/bs_seeker2-align.py Tue Nov 05 01:55:39 2013 -0500 @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python from optparse import OptionParser, OptionGroup import re @@ -7,44 +7,47 @@ from bs_align.bs_pair_end import * from bs_align.bs_single_end import * from bs_align.bs_rrbs import * -from bs_utils.utils import * +#import re +#from bs_utils.utils import * if __name__ == '__main__': - parser = OptionParser() + parser = OptionParser(usage="Usage: %prog {-i | -1 -2 } -g [options]") # option group 1 opt_group = OptionGroup(parser, "For single end reads") - opt_group.add_option("-i", "--input", type="string", dest="infilename",help="Input your read file name (FORMAT: sequences, fastq, qseq,fasta)", metavar="INFILE") + opt_group.add_option("-i", "--input", type="string", dest="infilename",help="Input read file (FORMAT: sequences, qseq, fasta, fastq). Ex: read.fa or read.fa.gz", metavar="INFILE") parser.add_option_group(opt_group) # option group 2 opt_group = OptionGroup(parser, "For pair end reads") - opt_group.add_option("-1", "--input_1", type="string", dest="infilename_1",help="Input your read file end 1 (FORMAT: sequences, qseq, fasta, fastq)", metavar="FILE") - opt_group.add_option("-2", "--input_2", type="string", dest="infilename_2",help="Input your read file end 2 (FORMAT: sequences, qseq, fasta, fastq)", metavar="FILE") - opt_group.add_option("--minins",type = "int",dest = "min_insert_size", help="The minimum insert size for valid paired-end alignments [Default: %default]", default = -1) - opt_group.add_option("--maxins",type = "int",dest = "max_insert_size", help="The maximum insert size for valid paired-end alignments [Default: %default]", default = 400) + opt_group.add_option("-1", "--input_1", type="string", dest="infilename_1",help="Input read file, mate 1 (FORMAT: sequences, qseq, fasta, fastq)", metavar="FILE") + opt_group.add_option("-2", "--input_2", type="string", dest="infilename_2",help="Input read file, mate 2 (FORMAT: sequences, qseq, fasta, fastq)", metavar="FILE") + opt_group.add_option("-I", "--minins",type = "int",dest = "min_insert_size", help="The minimum insert size for valid paired-end alignments [Default: %default]", default = 0) + opt_group.add_option("-X", "--maxins",type = "int",dest = "max_insert_size", help="The maximum insert size for valid paired-end alignments [Default: %default]", default = 500) parser.add_option_group(opt_group) # option group 3 opt_group = OptionGroup(parser, "Reduced Representation Bisulfite Sequencing Options") - opt_group.add_option("-r", "--rrbs", action="store_true", dest="rrbs", default = False, help = 'Process reads from Reduced Representation Bisulfite Sequencing experiments') + opt_group.add_option("-r", "--rrbs", action="store_true", dest="rrbs", default = False, help = 'Map reads to the Reduced Representation genome') opt_group.add_option("-c", "--cut-site", type="string",dest="cut_format", help="Cutting sites of restriction enzyme. Ex: MspI(C-CGG), Mael:(C-TAG), double-enzyme MspI&Mael:(C-CGG,C-TAG). [Default: %default]", metavar="pattern", default = "C-CGG") - opt_group.add_option("-L", "--low", type = "int", dest="rrbs_low_bound", help="lower bound of fragment length (excluding C-CGG ends) [Default: %default]", default = 40) - opt_group.add_option("-U", "--up", type = "int", dest="rrbs_up_bound", help="upper bound of fragment length (excluding C-CGG ends) [Default: %default]", default = 500) + opt_group.add_option("-L", "--low", type = "int", dest="rrbs_low_bound", help="Lower bound of fragment length (excluding C-CGG ends) [Default: %default]", default = 20) + opt_group.add_option("-U", "--up", type = "int", dest="rrbs_up_bound", help="Upper bound of fragment length (excluding C-CGG ends) [Default: %default]", default = 500) parser.add_option_group(opt_group) # option group 4 opt_group = OptionGroup(parser, "General options") opt_group.add_option("-t", "--tag", type="string", dest="taginfo",help="[Y]es for undirectional lib, [N]o for directional [Default: %default]", metavar="TAG", default = 'N') - opt_group.add_option("-s","--start_base",type = "int",dest = "cutnumber1", help="The first base of your read to be mapped [Default: %default]", default = 1) - opt_group.add_option("-e","--end_base",type = "int",dest = "cutnumber2", help="The last cycle number of your read to be mapped [Default: %default]", default = 200) - opt_group.add_option("-a", "--adapter", type="string", dest="adapter_file",help="Input text file of your adaptor sequences (to be trimed from the 3'end of the reads). Input 1 seq for dir. lib., 2 seqs for undir. lib. One line per sequence", metavar="FILE", default = '') - opt_group.add_option("--am",type = "int",dest = "adapter_mismatch", help="Number of mismatches allowed in adaptor [Default: %default]", default = 0) - opt_group.add_option("-g", "--genome", type="string", dest="genome",help="Name of the reference genome (the same as the reference genome file in the preprocessing step) [ex. chr21_hg18.fa]") - opt_group.add_option("-m", "--mismatches",type = "int", dest="int_no_mismatches",help="Number of mismatches in one read [Default: %default]", default = 4) - opt_group.add_option("--aligner", dest="aligner",help="Aligner program to perform the analisys: " + ', '.join(supported_aligners) + " [Default: %default]", metavar="ALIGNER", default = BOWTIE2) - opt_group.add_option("-p", "--path", dest="aligner_path", help="Path to the aligner program. Defaults: " +' '*70+ '\t'.join(('%s: %s '+' '*70) % (al, aligner_path[al]) for al in sorted(supported_aligners)), + opt_group.add_option("-s","--start_base",type = "int",dest = "cutnumber1", help="The first cycle of the read to be mapped [Default: %default]", default = 1) + opt_group.add_option("-e","--end_base",type = "int",dest = "cutnumber2", help="The last cycle of the read to be mapped [Default: %default]", default = 200) + opt_group.add_option("-a", "--adapter", type="string", dest="adapter_file",help="Input text file of your adaptor sequences (to be trimmed from the 3'end of the reads, ). " + "Input one seq for dir. lib., twon seqs for undir. lib. One line per sequence. " + "Only the first 10bp will be used", metavar="FILE", default = '') + opt_group.add_option("--am",type = "int",dest = "adapter_mismatch", help="Number of mismatches allowed in adapter [Default: %default]", default = 0) + opt_group.add_option("-g", "--genome", type="string", dest="genome",help="Name of the reference genome (should be the same as \"-f\" in bs_seeker2-build.py ) [ex. chr21_hg18.fa]") + opt_group.add_option("-m", "--mismatches",type = "float", dest="no_mismatches",help="Number of mismatches in one read [Default: %default]", default = 4) + opt_group.add_option("--aligner", dest="aligner",help="Aligner program for short reads mapping: " + ', '.join(supported_aligners) + " [Default: %default]", metavar="ALIGNER", default = BOWTIE) + opt_group.add_option("-p", "--path", dest="aligner_path", help="Path to the aligner program. Detected: " +' '*70+ '\t'.join(('%s: %s '+' '*70) % (al, aligner_path[al]) for al in sorted(supported_aligners)), metavar="PATH" ) opt_group.add_option("-d", "--db", type="string", dest="dbpath",help="Path to the reference genome library (generated in preprocessing genome) [Default: %default]" , metavar="DBPATH", default = reference_genome_path) @@ -52,7 +55,7 @@ opt_group.add_option("-o", "--output", type="string", dest="outfilename",help="The name of output file [INFILE.bs(se|pe|rrbs)]", metavar="OUTFILE") opt_group.add_option("-f", "--output-format", type="string", dest="output_format",help="Output format: "+', '.join(output.formats)+" [Default: %default]", metavar="FORMAT", default = output.BAM) opt_group.add_option("--no-header", action="store_true", dest="no_SAM_header",help="Suppress SAM header lines [Default: %default]", default = False) - opt_group.add_option("--temp_dir", type="string", dest="temp_dir",help="The path to your temporary directory [Default: %default]", metavar="PATH", default = tempfile.gettempdir()) + opt_group.add_option("--temp_dir", type="string", dest="temp_dir",help="The path to your temporary directory [Detected: %default]", metavar="PATH", default = tempfile.gettempdir()) opt_group.add_option("--XS",type = "string", dest="XS_filter",help="Filter definition for tag XS, format X,Y. X=0.8 and y=5 indicate that for one read, if #(mCH sites)/#(all CH sites)>0.8 and #(mCH sites)>5, then tag XS=1; or else tag XS=0. [Default: %default]", default = "0.5,5") # added by weilong opt_group.add_option("--multiple-hit", action="store_true", dest="Output_multiple_hit", default = False, help = 'Output reads with multiple hits to file\"Multiple_hit.fa\"') @@ -96,7 +99,7 @@ # if no options were given by the user, print help and exit if len(sys.argv) == 1: - print parser.print_help() + parser.print_help() exit(0) if options.version : @@ -112,6 +115,38 @@ if not (options.infilename or (options.infilename_1 and options.infilename_2)): error('You should set either -i or -1 and -2 options.') + + # Calculate the length of read + if options.infilename : + read_file = options.infilename + elif options.infilename_1 : + read_file = options.infilename_1 + else : + error('You should at least specify -i or -1 options.') + + try : + if read_file.endswith(".gz") : # support input file ending with ".gz" + read_inf = gzip.open(read_file, "rb") + else : + read_inf=open(read_file,"r") + except IOError : + print "[Error] Cannot open input file : %s" % read_file + exit(-1) + oneline = read_inf.readline() + oneline = read_inf.readline() # get the second line + read_len = min(len(oneline), (options.cutnumber2-options.cutnumber1)) + read_inf.close() + # mismatch allowed: bowtie 1,build-in parameter '-m'; bowtie 2, post-filter paramter + # mismatch should no greater than the read length + no_mismatches = float(options.no_mismatches) + if (no_mismatches < 1) : + int_no_mismatches=int(no_mismatches * read_len) + else : + int_no_mismatches=int(no_mismatches) + + str_no_mismatches=str(options.no_mismatches) # pass to specific mode + + # -t, directional / un-directional library asktag=str(options.taginfo).upper() if asktag not in 'YN': @@ -121,10 +156,8 @@ error('-a option should be: %s' % ' ,'.join(supported_aligners)+'.') # path for aligner aligner_exec = os.path.expanduser( os.path.join(options.aligner_path or aligner_path[options.aligner], options.aligner) ) - # mismatch allowed: bowtie 1,build-in parameter '-m'; bowtie 2, post-filter paramter - # mismatch should no greater than the read length - int_no_mismatches=min(options.int_no_mismatches, options.cutnumber2-options.cutnumber1) - str_no_mismatches=str(int_no_mismatches) + + # -g if options.genome is None: error('-g is a required option') @@ -149,19 +182,17 @@ db_path = os.path.expanduser(os.path.join(options.dbpath, genome_subdir + '_' + options.aligner)) + if not os.path.isdir(db_path): error('Index DIR \"' + genome_subdir + '..\" cannot be found in ' + options.dbpath +'.\n\tPlease run the bs_seeker2-build.py ' 'to create it with the correct parameters for -g, -r, --low, --up and --aligner.') - # handle aligner options - # - # default aligner options aligner_options_defaults = { BOWTIE : { '-e' : 40*int_no_mismatches, '--nomaqround' : True, '--norc' : True, - '-k' : 2, + #'-k' : 2, # -k=2; report two best hits, and filter by error rates '--quiet' : True, '--best' : True, @@ -179,7 +210,7 @@ # run bowtie2 in local mode by default '--local' : '--end-to-end' not in aligner_options, #'--mm' : True, - '-k' : 2 + #'-k' : 2 }, SOAP : { '-v' : int_no_mismatches, '-p' : 2, @@ -238,13 +269,22 @@ else: aligner_title = aligner_title + "-local" + if options.aligner == BOWTIE : + logm("Mode: Bowtie") + elif options.aligner == BOWTIE2 : + if '--end-to-end' not in aligner_options : + logm("Mode: Bowtie2, local alignment") + else : + logm("Mode: Bowtie2, end-to-end alignment") + + tmp_path = tempfile.mkdtemp(prefix='bs_seeker2_%s_-%s-TMP-' % (os.path.split(outfilename)[1], aligner_title ), dir = options.temp_dir) (XS_x, XS_y) = options.XS_filter.split(",") XS_pct = float(XS_x) XS_count = int(XS_y) - logm('Filter for tag XS: #(mCH)/#(all CH)>%f and #(mCH)>%d' % (XS_pct, XS_count)) + logm('Filter for tag XS: #(mCH)/#(all CH)>%.2f%% and #(mCH)>%d' % (XS_pct*100, XS_count)) logm('Temporary directory: %s' % tmp_path) @@ -253,8 +293,8 @@ logm('Single end') aligner_command = aligner_exec + aligner_options_string() + \ - { BOWTIE : ' %(reference_genome)s -f %(input_file)s %(output_file)s', - BOWTIE2 : ' -x %(reference_genome)s -f -U %(input_file)s -S %(output_file)s', + { BOWTIE : ' -k 2 %(reference_genome)s -f %(input_file)s %(output_file)s', + BOWTIE2 : ' -k 2 -x %(reference_genome)s -f -U %(input_file)s -S %(output_file)s', SOAP : ' -D %(reference_genome)s.fa.index -o %(output_file)s -a %(input_file)s', RMAP : ' -c %(reference_genome)s.fa -o %(output_file)s %(input_file)s' }[options.aligner] @@ -263,7 +303,6 @@ if options.rrbs: # RRBS scan bs_rrbs(options.infilename, asktag, - # options.rrbs_taginfo, options.adapter_file, options.cutnumber1, options.cutnumber2, @@ -299,18 +338,18 @@ else: logm('Pair end') # pair end specific default options - aligner_options = dict({BOWTIE: {'--ff' : asktag == 'N', - '--fr' : asktag == 'Y', + aligner_options = dict({BOWTIE: {'--fr' : True, '-X' : options.max_insert_size, - '-I' : options.min_insert_size if options.min_insert_size > 0 else None + '-I' : options.min_insert_size if options.min_insert_size > 0 else None, + '-a' : True # "-k 2" in bowtie would not report the best two }, BOWTIE2 : { - '--ff' : asktag == 'N', - '--fr' : asktag == 'Y', + '--fr' : True, '-X' : options.max_insert_size, '-I' : options.min_insert_size if options.min_insert_size > 0 else None, '--no-discordant' : True, - '--no-mixed' : True + '--no-mixed' : True, + '-k' : 2 }, SOAP: { '-x' : options.max_insert_size, @@ -328,6 +367,11 @@ logm('Aligner command: %s' % aligner_command) + if '--end-to-end' not in aligner_options: + aligner_options_defaults[BOWTIE2].update({'-D' : 50}) + else: + aligner_options_defaults[BOWTIE2].update({'-D' : 50, '-L': 15, '--score-min': 'L,-0.6,-0.6' }) + bs_pair_end(options.infilename_1, options.infilename_2, asktag, @@ -342,6 +386,7 @@ outfile, XS_pct, XS_count, + options.adapter_mismatch, options.Output_multiple_hit ) diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/bs_seeker2-build.py --- a/BSseeker2/bs_seeker2-build.py Fri Jul 12 18:47:28 2013 -0400 +++ b/BSseeker2/bs_seeker2-build.py Tue Nov 05 01:55:39 2013 -0500 @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python import os from optparse import OptionParser, OptionGroup @@ -12,8 +12,8 @@ parser = OptionParser() parser.add_option("-f", "--file", dest="filename", help="Input your reference genome file (fasta)", metavar="FILE") - parser.add_option("--aligner", dest="aligner", help="Aligner program to perform the analysis: " + ', '.join(supported_aligners) + " [Default: %default]", metavar="ALIGNER", default = BOWTIE2) - parser.add_option("-p", "--path", dest="aligner_path", help="Path to the aligner program. Defaults: " +' '*70+ '\t'.join(('%s: %s '+' '*70) % (al, aligner_path[al]) for al in sorted(supported_aligners)), + parser.add_option("--aligner", dest="aligner", help="Aligner program to perform the analysis: " + ', '.join(supported_aligners) + " [Default: %default]", metavar="ALIGNER", default = BOWTIE) + parser.add_option("-p", "--path", dest="aligner_path", help="Path to the aligner program. Detected: " +' '*70+ '\t'.join(('%s: %s '+' '*70) % (al, aligner_path[al]) for al in sorted(supported_aligners)), metavar="PATH") parser.add_option("-d", "--db", type="string", dest="dbpath", help="Path to the reference genome library (generated in preprocessing genome) [Default: %default]", metavar="DBPATH", default = reference_genome_path) @@ -23,7 +23,7 @@ rrbs_opts = OptionGroup(parser, "Reduced Representation Bisulfite Sequencing Options", "Use this options with conjuction of -r [--rrbs]") rrbs_opts.add_option("-r", "--rrbs", action="store_true", dest="rrbs", help = 'Build index specially for Reduced Representation Bisulfite Sequencing experiments. Genome other than certain fragments will be masked. [Default: %default]', default = False) - rrbs_opts.add_option("-l", "--low",type= "int", dest="low_bound", help="lower bound of fragment length (excluding recognition sequence such as C-CGG) [Default: %default]", default = 40) + rrbs_opts.add_option("-l", "--low",type= "int", dest="low_bound", help="lower bound of fragment length (excluding recognition sequence such as C-CGG) [Default: %default]", default = 20) rrbs_opts.add_option("-u", "--up", type= "int", dest="up_bound", help="upper bound of fragment length (excluding recognition sequence such as C-CGG ends) [Default: %default]", default = 500) rrbs_opts.add_option("-c", "--cut-site", type= "string", dest="cut_format", help="Cut sites of restriction enzyme. Ex: MspI(C-CGG), Mael:(C-TAG), double-enzyme MspI&Mael:(C-CGG,C-TAG). [Default: %default]", default = "C-CGG") parser.add_option_group(rrbs_opts) @@ -33,7 +33,7 @@ # if no options were given by the user, print help and exit if len(sys.argv) == 1: - print parser.print_help() + parser.print_help() exit(0) if options.version : @@ -44,7 +44,11 @@ rrbs = options.rrbs - fasta_file=os.path.expanduser(options.filename) + if options.filename is not None : + fasta_file=os.path.expanduser(options.filename) + else : + error("Please specify the genome file (Fasta) using \"-f\"") + if fasta_file is None: error('Fasta file for the reference genome must be supported') @@ -69,9 +73,14 @@ print "Reference genome file: %s" % fasta_file print "Reduced Representation Bisulfite Sequencing: %s" % rrbs + print "Short reads aligner you are using: %s" % options.aligner print "Builder path: %s" % builder_exec + #--------------------------------------------------------------- + if not os.path.isfile( builder_exec ) : + error("Cannot file program %s for execution." % builder_exec) + ref_path = options.dbpath if os.path.exists(ref_path): diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/bs_seeker2-call_methylation.py --- a/BSseeker2/bs_seeker2-call_methylation.py Fri Jul 12 18:47:28 2013 -0400 +++ b/BSseeker2/bs_seeker2-call_methylation.py Tue Nov 05 01:55:39 2013 -0500 @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python from optparse import OptionParser, OptionGroup from bs_utils.utils import * @@ -71,7 +71,7 @@ # if no options were given by the user, print help and exit if len(sys.argv) == 1: - print parser.print_help() + parser.print_help() exit(0) if options.version : @@ -133,8 +133,6 @@ nuc, context, subcontext = context_calling(chrom_seq, col.pos) total_reads = 0 - - for pr in col.pileups: # print pr if (not pr.indel) : # skip indels @@ -152,7 +150,6 @@ print 'WARNING: read %s has an invalid alignment. Discarding.. ' % pr.alignment.qname continue read_nuc = pr.alignment.seq[pr.qpos] - # print "read_nuc=", read_nuc if pr.alignment.is_reverse: ATCG_rev[read_nuc] += 1 else: @@ -194,7 +191,8 @@ wiggle.write('%d\t%f\n' % (pos, meth_level)) else : wiggle.write('%d\t-%f\n' % (pos, meth_level)) - CGmap.write('%(chrom)s\t%(nuc)s\t%(pos)d\t%(context)s\t%(subcontext)s\t%(meth_level_string)s\t%(meth_cytosines)s\t%(all_cytosines)s\n' % locals()) + + CGmap.write('%(chrom)s\t%(nuc)s\t%(pos)d\t%(context)s\t%(subcontext)s\t%(meth_level_string)s\t%(meth_cytosines)s\t%(all_cytosines)s\n' % locals()) ATCGmap.close() CGmap.close() wiggle.close() diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/bs_utils/.___init__.py Binary file BSseeker2/bs_utils/.___init__.py has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/bs_utils/._utils.py Binary file BSseeker2/bs_utils/._utils.py has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/bs_utils/__init__.py diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/bs_utils/utils.py --- a/BSseeker2/bs_utils/utils.py Fri Jul 12 18:47:28 2013 -0400 +++ b/BSseeker2/bs_utils/utils.py Tue Nov 05 01:55:39 2013 -0500 @@ -23,7 +23,7 @@ def show_version() : print "" - print " BS-Seeker2 v2.0.3 - May 19, 2013 " + print " BS-Seeker2 v2.0.5 - Nov 5, 2013 " print "" @@ -135,14 +135,15 @@ SOAP : '--soap-', RMAP : '--rmap-' } -aligner_path = dict((aligner, os.path.expanduser(find_location(aligner) or default_path)) - for aligner, default_path in - [(BOWTIE,'~/bowtie-0.12.7/'), - (BOWTIE2, '~/bowtie-0.12.7/'), - (SOAP, '~/soap2.21release/'), - (RMAP, '~/rmap_v2.05/bin') - ]) - +#aligner_path = dict((aligner, os.path.expanduser(find_location(aligner) or default_path)) +# for aligner, default_path in +# [(BOWTIE,'~/bowtie/'), +# (BOWTIE2, '~/bowtie2/'), +# (SOAP, '~/soap/'), +# (RMAP, '~/rmap/bin') +# ]) +aligner_path = dict((aligner, os.path.expanduser(find_location(aligner) or "None")) + for aligner in [(BOWTIE), (BOWTIE2), (SOAP), (RMAP)]) reference_genome_path = os.path.join(os.path.split(globals()['__file__'])[0],'reference_genomes') @@ -214,7 +215,10 @@ """ Splits a file (equivalend to UNIX split -l ) """ fno = 0 lno = 0 - INPUT = open(filename, 'r') + if filename.endswith(".gz") : + INPUT = gzip.open(filename, 'rb') + else : + INPUT = open(filename, 'r') output = None for l in INPUT: if lno == 0: @@ -321,7 +325,10 @@ commands = [(cmd[0], open(cmd[1], 'w')) if type(cmd) is tuple else (cmd, None) for cmd in commands] - logm('Starting commands:\n' + '\n'.join([cmd for cmd, stdout in commands])) + #logm('Starting commands:\n' + '\n'.join([cmd for cmd, stdout in commands])) + logm('Starting commands:') + for cmd, stdout in commands : + logm("Launched: "+cmd) for i, proc in enumerate([subprocess.Popen(args = shlex.split(cmd), stdout = stdout) for cmd, stdout in commands]): return_code = proc.wait() logm('Finished: ' + commands[i][0]) diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/galaxy/.___init__.py Binary file BSseeker2/galaxy/.___init__.py has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/galaxy/._bs_seeker2_wrapper.py Binary file BSseeker2/galaxy/._bs_seeker2_wrapper.py has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/galaxy/._bs_seeker2_wrapper.xml Binary file BSseeker2/galaxy/._bs_seeker2_wrapper.xml has changed diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/galaxy/__init__.py diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/galaxy/bs_seeker2_wrapper.py --- a/BSseeker2/galaxy/bs_seeker2_wrapper.py Fri Jul 12 18:47:28 2013 -0400 +++ b/BSseeker2/galaxy/bs_seeker2_wrapper.py Tue Nov 05 01:55:39 2013 -0500 @@ -114,7 +114,6 @@ ('_rrbs_%s_%s' % (getopt(args[ALIGN], '-l', '--low', '40'), getopt(args[ALIGN], '-u', '--up', '500')) if len(set(['-r', '--rrbs']) & set(args[ALIGN])) > 0 else '') + - '_' + args[ALIGN]['--aligner']) }) run_prog(os.path.join(path_to_bs_seeker, 'bs_seeker2-call_methylation.py'), args[CALL_METHYLATION]) diff -r e6df770c0e58 -r 8b26adf64adc BSseeker2/galaxy/bs_seeker2_wrapper.xml --- a/BSseeker2/galaxy/bs_seeker2_wrapper.xml Fri Jul 12 18:47:28 2013 -0400 +++ b/BSseeker2/galaxy/bs_seeker2_wrapper.xml Tue Nov 05 01:55:39 2013 -0500 @@ -1,255 +1,270 @@ - - bs_seeker2 - Versatile aligner for bisulfite sequencing data - - bs_seeker2_wrapper.py - ### define exec path - ### --exec-path "/u/home/galaxy/galaxy/GalaxyTools/bin" - ### [Please change the following path to your local directory] - --exec-path "/Users/weilong/Documents/program/BSseeker2" - ### output - --align--output $align_output - --call_methylation--wig $call_methylation_wig - --call_methylation--CGmap $call_methylation_CGmap - --call_methylation--ATCGmap $call_methylation_ATCGmap - --call_methylation--txt - - #if $singlePaired.sPaired == "paired" - --align--input_1 $input1 - --align--input_2 $singlePaired.input2 - #end if - - - ### aligner - --align--aligner ${choosealigner.aligner} - - ### Index from history or built-in - #if $choosealigner.rrbsFragments.refGenomeSource.genomeSource == "history" - --build--file ${choosealigner.rrbsFragments.refGenomeSource.ownFile} - --build--aligner ${choosealigner.aligner} - --align-g ${choosealigner.rrbsFragments.refGenomeSource.ownFile} - --align--db ${choosealigner.rrbsFragments.refGenomeSource.ownFile} - #else if $choosealigner.rrbsFragments.refGenomeSource.genomeSource == "indexed" - --align--db ${choosealigner.rrbsFragments.refGenomeSource.index.fields.path} - --align-g ${choosealigner.rrbsFragments.refGenomeSource.index.fields.path}/${choosealigner.rrbsFragments.refGenomeSource.index.fields.dbkey}.fa - - #end if - - ### RRBS or WGBS - #if $choosealigner.rrbsFragments.Fragmented == "Yes" - #if $choosealigner.rrbsFragments.refGenomeSource.genomeSource == "history" - --build--rrbs - --build--low ${choosealigner.rrbsFragments.lowerBound} - --build--up ${choosealigner.rrbsFragments.upperBound} - #end if - --align--rrbs - --align--low ${choosealigner.rrbsFragments.lowerBound} - --align--up ${choosealigner.rrbsFragments.upperBound} - #end if - - - - ### Inputs - #if $singlePaired.sPaired == "single" - --align-i $input1 - #end if - - ### Library type - --align-t $tag - - ### other general options - #if $sParams.sSettingsType == "preSet" - --align--start_base 1 - --align--end_base 200 - --align--mis 4 - #end if - - ### adapter information - #if $adapterInfo.useAdapter == "Yes" - --align--adapter ${adapterInfo.adapter_file} - #end if - - #if $sParams.sSettingsType == "full" - --align--start_base ${sParams.start_base} - --align--end_base ${sParams.end_base} - --align--mis ${sParams.num_mismatch} - #end if - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -**What it does** - -BS-Seeker2 is a seamlessly pipeline for mapping bisulfite sequencing data and generating detailed DNA methylome. BS-Seeker2 improves mappability by using local alignment, and is tailored for RRBS library by building special index, with higher efficiency and accuracy. This is the Galaxy version of BS-Seeker2. - ------- - -**Resources** - -The homepage for BS-Seeker2 is http://pellegrini.mcdb.ucla.edu/BS_Seeker2/. - -For more information of BS-Seeker2, please refer to https://github.com/BSSeeker/BSseeker2. - ------- - -**Example** - -- Adapter file:: - - AGATCGGAAGAGCACACGTC - - - - + + bs_seeker2 + Versatile aligner for bisulfite sequencing data + + bs_seeker2_wrapper.py + ### define exec path + ### --exec-path "/u/home/galaxy/galaxy/GalaxyTools/bin" + ### [Please change the following path to your local directory] + --exec-path "/Users/weilong/Documents/program/BSseeker2" + ### output + --align--output $align_output + --call_methylation--wig $call_methylation_wig + --call_methylation--CGmap $call_methylation_CGmap + --call_methylation--ATCGmap $call_methylation_ATCGmap + --call_methylation--txt + + #if $singlePaired.sPaired == "paired" + --align--input_1 $input1 + --align--input_2 $singlePaired.input2 + #end if + + + ### aligner + --align--aligner ${choosealigner.aligner} + #if $choosealigner.aligner == "bowtie2" + #if $choosealigner.mode_type == "local" + --align--bt2--local + #else + --align--bt2--end-to-end + #end if + #end if + + ### Index from history or built-in + #if $choosealigner.rrbsFragments.refGenomeSource.genomeSource == "history" + --build--file ${choosealigner.rrbsFragments.refGenomeSource.ownFile} + --build--aligner ${choosealigner.aligner} + --align-g ${choosealigner.rrbsFragments.refGenomeSource.ownFile} + --align--db ${choosealigner.rrbsFragments.refGenomeSource.ownFile} + #else if $choosealigner.rrbsFragments.refGenomeSource.genomeSource == "indexed" + --align--db ${choosealigner.rrbsFragments.refGenomeSource.index.fields.path} + --align-g ${choosealigner.rrbsFragments.refGenomeSource.index.fields.path}/${choosealigner.rrbsFragments.refGenomeSource.index.fields.dbkey}.fa + + #end if + + ### RRBS or WGBS + #if $choosealigner.rrbsFragments.Fragmented == "Yes" + #if $choosealigner.rrbsFragments.refGenomeSource.genomeSource == "history" + --build--rrbs + --build--low ${choosealigner.rrbsFragments.lowerBound} + --build--up ${choosealigner.rrbsFragments.upperBound} + #end if + --align--rrbs + --align--low ${choosealigner.rrbsFragments.lowerBound} + --align--up ${choosealigner.rrbsFragments.upperBound} + #end if + + + + ### Inputs + #if $singlePaired.sPaired == "single" + --align-i $input1 + #end if + + ### Library type + --align-t $tag + + ### other general options + #if $sParams.sSettingsType == "preSet" + --align--start_base 1 + --align--end_base 200 + --align--mis 4 + #end if + + ### adapter information + #if $adapterInfo.useAdapter == "Yes" + --align--adapter ${adapterInfo.adapter_file} + #end if + + #if $sParams.sSettingsType == "full" + --align--start_base ${sParams.start_base} + --align--end_base ${sParams.end_base} + --align--mis ${sParams.num_mismatch} + #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +BS-Seeker2 is a seamlessly pipeline for mapping bisulfite sequencing data and generating detailed DNA methylome. BS-Seeker2 improves mappability by using local alignment, and is tailored for RRBS library by building special index, with higher efficiency and accuracy. This is the Galaxy version of BS-Seeker2. + +------ + +**Resources** + +The homepage for BS-Seeker2 is http://pellegrini.mcdb.ucla.edu/BS_Seeker2/. + +For more information of BS-Seeker2, please refer to https://github.com/BSSeeker/BSseeker2. + +------ + +**Example** + +- Adapter file:: + + AGATCGGAAGAGCACACGTC + + + +