view notes.txt @ 0:0475e4175855 draft default tip

planemo upload commit 81ece2551cea27cbd0e718ef5b7a2fe8d4abd071-dirty
author yqiancolumbia
date Mon, 30 Apr 2018 05:25:11 -0400
parents
children
line wrap: on
line source

https://galaxyproject.org/admin/data-integration/

su -l galaxy

prerequisites
-------------
cd tools
ln -s /home/yq2139/czlab_src/CTK/ CTK
ln -s /home/yq2139/czlab_src/ngs/ ngs
ln -s /home/yq2139/czlab_src/plib/ plib
rsync -avzP yq2139@intron.c2b2.columbia.edu:/ifs/data/c2b2/cz_lab/tools/fastx_toolkit_0.0.14/ .
#rsync -avzP yq2139@intron.c2b2.columbia.edu:/ifs/data/c2b2/cz_lab/tools/bwa-0.7.12/ .

# IMPORTANT add /home/galaxy/tools/fastx_toolkit_0.0.14/bin to PATH in ~/.bash_profile 
# same for samtools installed later

cd /home/galaxy/galaxy_data
rsync -avzP yq2139@intron.c2b2.columbia.edu:/ifs/data/c2b2/cz_lab/genomes/mm10/bwa/* genomes/mm10/bwa/
rsync -avzP yq2139@intron.c2b2.columbia.edu:/ifs/data/c2b2/cz_lab/genomes/hg19/bwa/* genomes/hg19/bwa/
rsync -avzP yq2139@intron.c2b2.columbia.edu://ifs/data/c2b2/cz_lab/genomes/mm10/annotation/rmsk.bed /home/galaxy/galaxy_data/genomes/mm10/annotation/
rsync -avzP yq2139@intron.c2b2.columbia.edu://ifs/data/c2b2/cz_lab/genomes/mm10/annotation/rmsk.RNA.bed /home/galaxy/galaxy_data/genomes/mm10/annotation/
rsync -avzP yq2139@intron.c2b2.columbia.edu://ifs/data/c2b2/cz_lab/genomes/hg19/annotation/rmsk.RNA.bed /home/galaxy/galaxy_data/genomes/hg19/annotation/
rsync -avzP yq2139@intron.c2b2.columbia.edu://ifs/data/c2b2/cz_lab/genomes/hg19/annotation/rmsk.bed /home/galaxy/galaxy_data/genomes/hg19/annotation/

mkdir /home/galaxy/galaxy_data/cache
# to temporarily store cache for fastq2collapse.pl etc.

install bwa + bam2sam
---------------------
# ref https://biostar.usegalaxy.org/p/24896/#24919 
# steps: http://intron.c2b2.columbia.edu:8888/ - admin - search tool shed - galaxy main tool shed - bwa - preview and install
# if uninstall tools, one need to remove corresponding directories, and “reset metadata” in the admin section

# modify tool-data/bwa_mem_index.loc : 
	hg19bwa hg19    hg19  /home/galaxy/galaxy_data/genomes/hg19/bwa/hg19.fa
	mm10bwa mm10    mm10  /home/galaxy/galaxy_data/genomes/mm10/bwa/mm10.fa

# modify integrated_tool_panel.xml to change showing order

# add these below (taken from galaxy/config/shed_tool_conf.xml) to config/tool_conf.xml :

    <tool file="../shed_tools/toolshed.g2.bx.psu.edu/repos/devteam/bwa/4d82cf59895e/bwa/bwa.xml" g
uid="toolshed.g2.bx.psu.edu/repos/devteam/bwa/bwa/0.7.16.2">
      <tool_shed>toolshed.g2.bx.psu.edu</tool_shed>
        <repository_name>bwa</repository_name>
        <repository_owner>devteam</repository_owner>
        <installed_changeset_revision>4d82cf59895e</installed_changeset_revision>
        <id>toolshed.g2.bx.psu.edu/repos/devteam/bwa/bwa/0.7.16.2</id>
        <version>0.7.16.2</version>
    </tool>

    <tool file="../shed_tools/toolshed.g2.bx.psu.edu/repos/devteam/bam_to_sam/af7c50162f0b/bam_to_sam/bam_to_sam.xml" guid="toolshed.g2.bx.psu.edu/repos/devteam/bam_to_sam/bam_to_sam/2.0">
      <tool_shed>toolshed.g2.bx.psu.edu</tool_shed>
        <repository_name>bam_to_sam</repository_name>
        <repository_owner>devteam</repository_owner>
        <installed_changeset_revision>af7c50162f0b</installed_changeset_revision>
        <id>toolshed.g2.bx.psu.edu/repos/devteam/bam_to_sam/bam_to_sam/2.0</id>
        <version>2.0</version>
    </tool>


problems fixed
---------------
# (done) redirect stderr to stdout (my $msgio = $outBedFile eq '-' ? *STDERR :  *STDOUT;) : fastq_filter.pl stripBarcode.pl parseAlignment.pl fastq2collapse.pl  tag2peak.pl tag2profile.pl CIMS.pl tag2cluster.pl
# (no need if correctly added environment variables) add a new data type "sai"; ref https://biostar.usegalaxy.org/p/24983/ and  https://galaxyproject.org/admin/datatypes/adding-datatypes/
	step1 add the line below to the datatypes section in ./config/datatypes_conf.xml.sample file:
		<datatype extension="sai" type="galaxy.datatypes.binary:Sai" subclass="True"/>
	step2 In binary.py file, add:
		class Sai( Binary ):
		"""Class describing a Sai file"""
		file_ext = "sai"
	In registry.py file, add:
		'sai' : binary.Sai(),
# (done) bed2annotation.pl problem: "cat: write error: Broken pipe" 
# (done) bedExt.pl problem: /data/galaxy/database/files/000/dataset_241.dat already exists

to upload file >2GB 
--------------------
#upload the file using rsync or scp to c2b2 server /ifs/scratch/c2b2/cz_lab/web_data/galaxy_tmp
# Then specify the link to the file in galaxy following the example below:
# https://zhanglab.c2b2.columbia.edu/data/galaxy_tmp/HepG2.RBFOX2.rep1.R2.fastq.gz 


test
----
cd /home/galaxy/galaxy_test/ctk
rsync -avzP yq2139@intron.c2b2.columbia.edu://mnt/chromatin/archive_proj/cz2294/CLIP_comparison/BrainRbfox/fastq/Fox1_1.fastq.gz .
#Fox1_1.fastq.gz is std; Fox1_3.fastq.gz is brdu

gzip -cd Fox1_3.fastq.gz |head -1000000 > Fox1_3.1000000lines.raw.fastq
perl /home/galaxy/tools/CTK/fastq_filter.pl -v -if sanger -f mean:0-38:20 -maxN -1 -of fastq Fox1_3.1000000lines.raw.fastq Fox1_3.1000000lines.filtered.fastq >& fastqfilter.log
fastx_clipper -a TCGTATGCCGTCTTCTGCTTG  -l 29 -n -v -i Fox1_3.1000000lines.filtered.fastq -o Fox1_3.1000000lines.trim1.fastq >& fastx_clipper.log
fastq_quality_trimmer -i Fox1_3.1000000lines.trim1.fastq -v -t 5 -l 29 -o Fox1_3.1000000lines.trim2.fastq >& fastq_quality_trimmer.log
perl /home/galaxy/tools/CTK/fastq2collapse.pl Fox1_3.1000000lines.trim2.fastq -v Fox1_3.1000000lines.trim2.c.fastq >& fastq2collapse.log
perl /home/galaxy/tools/CTK/stripBarcode.pl -v -format fastq -len 14 Fox1_3.1000000lines.trim2.c.fastq Fox1_3.1000000lines.trim2.c.tag.fastq >& stripBarcode.log
/home/galaxy/tools/bwa-0.7.12/bwa aln -t 4 -n 0.06 -q 20 /home/galaxy/galaxy_data/genomes/mm10/bwa/mm10.fa Fox1_3.1000000lines.trim2.c.tag.fastq > Fox1_3.1000000lines.sai 
# can't make log for bwa aln or the sai file is empty
/home/galaxy/tools/bwa-0.7.12/bwa samse  /home/galaxy/galaxy_data/genomes/mm10/bwa/mm10.fa Fox1_3.1000000lines.sai Fox1_3.1000000lines.trim2.c.tag.fastq > Fox1_3.1000000lines.sam
perl /home/galaxy/tools/CTK/parseAlignment.pl -v --map-qual 1 --min-len 18 --mutation-file Fox1_3.1000000lines.mutation.txt Fox1_3.1000000lines.sam Fox1_3.1000000lines.tag.bed >& parseAlignment.log
perl /home/galaxy/tools/CTK/tag2collapse.pl -v -big --random-barcode -EM 30 --seq-error-model alignment -weight --weight-in-name --keep-max-score --keep-tag-name Fox1_3.1000000lines.tag.bed Fox1_3.1000000lines.tag.uniq.bed >& tag2collapse.log
perl /home/galaxy/tools/CTK/selectRow.pl -q 3 -f 3 Fox1_3.1000000lines.mutation.txt  Fox1_3.1000000lines.tag.uniq.bed   > Fox1_3.1000000lines.tag.uniq.mutation.txt
perl /home/galaxy/tools/CTK/bed2annotation.pl -conf /home/galaxy/tools/CTK/annotation.loc -dbkey mm10 -ss -big -region  -v Fox1_3.1000000lines.tag.uniq.bed  Fox1_3.1000000lines.tag.uniq.annot.txt >& annot.log
perl /home/galaxy/tools/CTK/tag2peak.pl -big -ss -v --valley-seeking -p 0.05 --valley-depth 0.9 --dbkey mm10 --multi-test Fox1_3.1000000lines.tag.uniq.bed Fox1_3.1000000lines.pool.tag.uniq.peak.sig.bed >& tag2peak.log
perl /home/galaxy/tools/CTK/tag2profile.pl -v -ss -exact -of bedgraph  Fox1_3.1000000lines.tag.uniq.bed Fox1_3.1000000lines.tag.uniq.bedgraph >& bedgraph_tag2profile.log
awk '{if($9=="-") {print $0}}'  Fox1_3.1000000lines.tag.uniq.mutation.txt | cut -f 1-6 > Fox1_3.1000000lines.tag.uniq.del.bed
perl /home/galaxy/tools/CTK/CIMS.pl -n 10 -p -v Fox1_3.1000000lines.tag.uniq.bed Fox1_3.1000000lines.tag.uniq.del.bed Fox1_3.1000000lines.tag.uniq.del.CIMS.txt >& cims.log
perl /home/galaxy/tools/CTK/removeRow.pl -q 3 -f 3 -v Fox1_3.1000000lines.tag.uniq.bed Fox1_3.1000000lines.tag.uniq.del.bed > Fox1_3.1000000lines.tag.uniq.clean.bed
perl /home/galaxy/tools/CTK/bedExt.pl -n up -l -1 -r -1 -v Fox1_3.1000000lines.tag.uniq.clean.bed Fox1_3.1000000lines.tag.uniq.clean.trunc.bed >& bedExt.log
perl /home/galaxy/tools/CTK/tag2cluster.pl -big -s -maxgap "-1" -of bed -v Fox1_3.1000000lines.tag.uniq.bed Fox1_3.1000000lines.tag.uniq.cluster.0.bed >& tag2cluster.log
awk '{if($5>2) {print $0}}' Fox1_3.1000000lines.tag.uniq.cluster.0.bed > Fox1_3.1000000lines.tag.uniq.cluster.bed
perl /home/galaxy/tools/CTK/tag2peak.pl -big -ss -v --prefix "CITS" -gap 25 -p 0.001 -gene Fox1_3.1000000lines.tag.uniq.cluster.bed Fox1_3.1000000lines.tag.uniq.clean.trunc.bed Fox1_3.1000000lines.tag.uniq.clean.CITS.s30.bed >& CITS.log

check
-----
cd /home/yq2139/mnt_prj/CTK_testing/filtering
diff <(cat /home/galaxy/galaxy_test/ctk/testing_std/Fox1_1.1000000lines.filtered.fastq) <(head  -982696 Fox1_1.fastq)|head
diff <(cat /home/galaxy/galaxy_test/ctk/testing_std/Fox1_1.1000000lines.trim.fastq) <(head  -895544 Fox1_1.trim.fastq)|head