comparison notes.txt @ 0:0475e4175855 draft default tip

planemo upload commit 81ece2551cea27cbd0e718ef5b7a2fe8d4abd071-dirty
author yqiancolumbia
date Mon, 30 Apr 2018 05:25:11 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:0475e4175855
1 https://galaxyproject.org/admin/data-integration/
2
3 su -l galaxy
4
5 prerequisites
6 -------------
7 cd tools
8 ln -s /home/yq2139/czlab_src/CTK/ CTK
9 ln -s /home/yq2139/czlab_src/ngs/ ngs
10 ln -s /home/yq2139/czlab_src/plib/ plib
11 rsync -avzP yq2139@intron.c2b2.columbia.edu:/ifs/data/c2b2/cz_lab/tools/fastx_toolkit_0.0.14/ .
12 #rsync -avzP yq2139@intron.c2b2.columbia.edu:/ifs/data/c2b2/cz_lab/tools/bwa-0.7.12/ .
13
14 # IMPORTANT add /home/galaxy/tools/fastx_toolkit_0.0.14/bin to PATH in ~/.bash_profile
15 # same for samtools installed later
16
17 cd /home/galaxy/galaxy_data
18 rsync -avzP yq2139@intron.c2b2.columbia.edu:/ifs/data/c2b2/cz_lab/genomes/mm10/bwa/* genomes/mm10/bwa/
19 rsync -avzP yq2139@intron.c2b2.columbia.edu:/ifs/data/c2b2/cz_lab/genomes/hg19/bwa/* genomes/hg19/bwa/
20 rsync -avzP yq2139@intron.c2b2.columbia.edu://ifs/data/c2b2/cz_lab/genomes/mm10/annotation/rmsk.bed /home/galaxy/galaxy_data/genomes/mm10/annotation/
21 rsync -avzP yq2139@intron.c2b2.columbia.edu://ifs/data/c2b2/cz_lab/genomes/mm10/annotation/rmsk.RNA.bed /home/galaxy/galaxy_data/genomes/mm10/annotation/
22 rsync -avzP yq2139@intron.c2b2.columbia.edu://ifs/data/c2b2/cz_lab/genomes/hg19/annotation/rmsk.RNA.bed /home/galaxy/galaxy_data/genomes/hg19/annotation/
23 rsync -avzP yq2139@intron.c2b2.columbia.edu://ifs/data/c2b2/cz_lab/genomes/hg19/annotation/rmsk.bed /home/galaxy/galaxy_data/genomes/hg19/annotation/
24
25 mkdir /home/galaxy/galaxy_data/cache
26 # to temporarily store cache for fastq2collapse.pl etc.
27
28 install bwa + bam2sam
29 ---------------------
30 # ref https://biostar.usegalaxy.org/p/24896/#24919
31 # steps: http://intron.c2b2.columbia.edu:8888/ - admin - search tool shed - galaxy main tool shed - bwa - preview and install
32 # if uninstall tools, one need to remove corresponding directories, and “reset metadata” in the admin section
33
34 # modify tool-data/bwa_mem_index.loc :
35 hg19bwa hg19 hg19 /home/galaxy/galaxy_data/genomes/hg19/bwa/hg19.fa
36 mm10bwa mm10 mm10 /home/galaxy/galaxy_data/genomes/mm10/bwa/mm10.fa
37
38 # modify integrated_tool_panel.xml to change showing order
39
40 # add these below (taken from galaxy/config/shed_tool_conf.xml) to config/tool_conf.xml :
41
42 <tool file="../shed_tools/toolshed.g2.bx.psu.edu/repos/devteam/bwa/4d82cf59895e/bwa/bwa.xml" g
43 uid="toolshed.g2.bx.psu.edu/repos/devteam/bwa/bwa/0.7.16.2">
44 <tool_shed>toolshed.g2.bx.psu.edu</tool_shed>
45 <repository_name>bwa</repository_name>
46 <repository_owner>devteam</repository_owner>
47 <installed_changeset_revision>4d82cf59895e</installed_changeset_revision>
48 <id>toolshed.g2.bx.psu.edu/repos/devteam/bwa/bwa/0.7.16.2</id>
49 <version>0.7.16.2</version>
50 </tool>
51
52 <tool file="../shed_tools/toolshed.g2.bx.psu.edu/repos/devteam/bam_to_sam/af7c50162f0b/bam_to_sam/bam_to_sam.xml" guid="toolshed.g2.bx.psu.edu/repos/devteam/bam_to_sam/bam_to_sam/2.0">
53 <tool_shed>toolshed.g2.bx.psu.edu</tool_shed>
54 <repository_name>bam_to_sam</repository_name>
55 <repository_owner>devteam</repository_owner>
56 <installed_changeset_revision>af7c50162f0b</installed_changeset_revision>
57 <id>toolshed.g2.bx.psu.edu/repos/devteam/bam_to_sam/bam_to_sam/2.0</id>
58 <version>2.0</version>
59 </tool>
60
61
62 problems fixed
63 ---------------
64 # (done) redirect stderr to stdout (my $msgio = $outBedFile eq '-' ? *STDERR : *STDOUT;) : fastq_filter.pl stripBarcode.pl parseAlignment.pl fastq2collapse.pl tag2peak.pl tag2profile.pl CIMS.pl tag2cluster.pl
65 # (no need if correctly added environment variables) add a new data type "sai"; ref https://biostar.usegalaxy.org/p/24983/ and https://galaxyproject.org/admin/datatypes/adding-datatypes/
66 step1 add the line below to the datatypes section in ./config/datatypes_conf.xml.sample file:
67 <datatype extension="sai" type="galaxy.datatypes.binary:Sai" subclass="True"/>
68 step2 In binary.py file, add:
69 class Sai( Binary ):
70 """Class describing a Sai file"""
71 file_ext = "sai"
72 In registry.py file, add:
73 'sai' : binary.Sai(),
74 # (done) bed2annotation.pl problem: "cat: write error: Broken pipe"
75 # (done) bedExt.pl problem: /data/galaxy/database/files/000/dataset_241.dat already exists
76
77 to upload file >2GB
78 --------------------
79 #upload the file using rsync or scp to c2b2 server /ifs/scratch/c2b2/cz_lab/web_data/galaxy_tmp
80 # Then specify the link to the file in galaxy following the example below:
81 # https://zhanglab.c2b2.columbia.edu/data/galaxy_tmp/HepG2.RBFOX2.rep1.R2.fastq.gz
82
83
84 test
85 ----
86 cd /home/galaxy/galaxy_test/ctk
87 rsync -avzP yq2139@intron.c2b2.columbia.edu://mnt/chromatin/archive_proj/cz2294/CLIP_comparison/BrainRbfox/fastq/Fox1_1.fastq.gz .
88 #Fox1_1.fastq.gz is std; Fox1_3.fastq.gz is brdu
89
90 gzip -cd Fox1_3.fastq.gz |head -1000000 > Fox1_3.1000000lines.raw.fastq
91 perl /home/galaxy/tools/CTK/fastq_filter.pl -v -if sanger -f mean:0-38:20 -maxN -1 -of fastq Fox1_3.1000000lines.raw.fastq Fox1_3.1000000lines.filtered.fastq >& fastqfilter.log
92 fastx_clipper -a TCGTATGCCGTCTTCTGCTTG -l 29 -n -v -i Fox1_3.1000000lines.filtered.fastq -o Fox1_3.1000000lines.trim1.fastq >& fastx_clipper.log
93 fastq_quality_trimmer -i Fox1_3.1000000lines.trim1.fastq -v -t 5 -l 29 -o Fox1_3.1000000lines.trim2.fastq >& fastq_quality_trimmer.log
94 perl /home/galaxy/tools/CTK/fastq2collapse.pl Fox1_3.1000000lines.trim2.fastq -v Fox1_3.1000000lines.trim2.c.fastq >& fastq2collapse.log
95 perl /home/galaxy/tools/CTK/stripBarcode.pl -v -format fastq -len 14 Fox1_3.1000000lines.trim2.c.fastq Fox1_3.1000000lines.trim2.c.tag.fastq >& stripBarcode.log
96 /home/galaxy/tools/bwa-0.7.12/bwa aln -t 4 -n 0.06 -q 20 /home/galaxy/galaxy_data/genomes/mm10/bwa/mm10.fa Fox1_3.1000000lines.trim2.c.tag.fastq > Fox1_3.1000000lines.sai
97 # can't make log for bwa aln or the sai file is empty
98 /home/galaxy/tools/bwa-0.7.12/bwa samse /home/galaxy/galaxy_data/genomes/mm10/bwa/mm10.fa Fox1_3.1000000lines.sai Fox1_3.1000000lines.trim2.c.tag.fastq > Fox1_3.1000000lines.sam
99 perl /home/galaxy/tools/CTK/parseAlignment.pl -v --map-qual 1 --min-len 18 --mutation-file Fox1_3.1000000lines.mutation.txt Fox1_3.1000000lines.sam Fox1_3.1000000lines.tag.bed >& parseAlignment.log
100 perl /home/galaxy/tools/CTK/tag2collapse.pl -v -big --random-barcode -EM 30 --seq-error-model alignment -weight --weight-in-name --keep-max-score --keep-tag-name Fox1_3.1000000lines.tag.bed Fox1_3.1000000lines.tag.uniq.bed >& tag2collapse.log
101 perl /home/galaxy/tools/CTK/selectRow.pl -q 3 -f 3 Fox1_3.1000000lines.mutation.txt Fox1_3.1000000lines.tag.uniq.bed > Fox1_3.1000000lines.tag.uniq.mutation.txt
102 perl /home/galaxy/tools/CTK/bed2annotation.pl -conf /home/galaxy/tools/CTK/annotation.loc -dbkey mm10 -ss -big -region -v Fox1_3.1000000lines.tag.uniq.bed Fox1_3.1000000lines.tag.uniq.annot.txt >& annot.log
103 perl /home/galaxy/tools/CTK/tag2peak.pl -big -ss -v --valley-seeking -p 0.05 --valley-depth 0.9 --dbkey mm10 --multi-test Fox1_3.1000000lines.tag.uniq.bed Fox1_3.1000000lines.pool.tag.uniq.peak.sig.bed >& tag2peak.log
104 perl /home/galaxy/tools/CTK/tag2profile.pl -v -ss -exact -of bedgraph Fox1_3.1000000lines.tag.uniq.bed Fox1_3.1000000lines.tag.uniq.bedgraph >& bedgraph_tag2profile.log
105 awk '{if($9=="-") {print $0}}' Fox1_3.1000000lines.tag.uniq.mutation.txt | cut -f 1-6 > Fox1_3.1000000lines.tag.uniq.del.bed
106 perl /home/galaxy/tools/CTK/CIMS.pl -n 10 -p -v Fox1_3.1000000lines.tag.uniq.bed Fox1_3.1000000lines.tag.uniq.del.bed Fox1_3.1000000lines.tag.uniq.del.CIMS.txt >& cims.log
107 perl /home/galaxy/tools/CTK/removeRow.pl -q 3 -f 3 -v Fox1_3.1000000lines.tag.uniq.bed Fox1_3.1000000lines.tag.uniq.del.bed > Fox1_3.1000000lines.tag.uniq.clean.bed
108 perl /home/galaxy/tools/CTK/bedExt.pl -n up -l -1 -r -1 -v Fox1_3.1000000lines.tag.uniq.clean.bed Fox1_3.1000000lines.tag.uniq.clean.trunc.bed >& bedExt.log
109 perl /home/galaxy/tools/CTK/tag2cluster.pl -big -s -maxgap "-1" -of bed -v Fox1_3.1000000lines.tag.uniq.bed Fox1_3.1000000lines.tag.uniq.cluster.0.bed >& tag2cluster.log
110 awk '{if($5>2) {print $0}}' Fox1_3.1000000lines.tag.uniq.cluster.0.bed > Fox1_3.1000000lines.tag.uniq.cluster.bed
111 perl /home/galaxy/tools/CTK/tag2peak.pl -big -ss -v --prefix "CITS" -gap 25 -p 0.001 -gene Fox1_3.1000000lines.tag.uniq.cluster.bed Fox1_3.1000000lines.tag.uniq.clean.trunc.bed Fox1_3.1000000lines.tag.uniq.clean.CITS.s30.bed >& CITS.log
112
113 check
114 -----
115 cd /home/yq2139/mnt_prj/CTK_testing/filtering
116 diff <(cat /home/galaxy/galaxy_test/ctk/testing_std/Fox1_1.1000000lines.filtered.fastq) <(head -982696 Fox1_1.fastq)|head
117 diff <(cat /home/galaxy/galaxy_test/ctk/testing_std/Fox1_1.1000000lines.trim.fastq) <(head -895544 Fox1_1.trim.fastq)|head