Mercurial > repos > yqiancolumbia > ctk_test
comparison notes.txt @ 0:0475e4175855 draft default tip
planemo upload commit 81ece2551cea27cbd0e718ef5b7a2fe8d4abd071-dirty
author | yqiancolumbia |
---|---|
date | Mon, 30 Apr 2018 05:25:11 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:0475e4175855 |
---|---|
1 https://galaxyproject.org/admin/data-integration/ | |
2 | |
3 su -l galaxy | |
4 | |
5 prerequisites | |
6 ------------- | |
7 cd tools | |
8 ln -s /home/yq2139/czlab_src/CTK/ CTK | |
9 ln -s /home/yq2139/czlab_src/ngs/ ngs | |
10 ln -s /home/yq2139/czlab_src/plib/ plib | |
11 rsync -avzP yq2139@intron.c2b2.columbia.edu:/ifs/data/c2b2/cz_lab/tools/fastx_toolkit_0.0.14/ . | |
12 #rsync -avzP yq2139@intron.c2b2.columbia.edu:/ifs/data/c2b2/cz_lab/tools/bwa-0.7.12/ . | |
13 | |
14 # IMPORTANT add /home/galaxy/tools/fastx_toolkit_0.0.14/bin to PATH in ~/.bash_profile | |
15 # same for samtools installed later | |
16 | |
17 cd /home/galaxy/galaxy_data | |
18 rsync -avzP yq2139@intron.c2b2.columbia.edu:/ifs/data/c2b2/cz_lab/genomes/mm10/bwa/* genomes/mm10/bwa/ | |
19 rsync -avzP yq2139@intron.c2b2.columbia.edu:/ifs/data/c2b2/cz_lab/genomes/hg19/bwa/* genomes/hg19/bwa/ | |
20 rsync -avzP yq2139@intron.c2b2.columbia.edu://ifs/data/c2b2/cz_lab/genomes/mm10/annotation/rmsk.bed /home/galaxy/galaxy_data/genomes/mm10/annotation/ | |
21 rsync -avzP yq2139@intron.c2b2.columbia.edu://ifs/data/c2b2/cz_lab/genomes/mm10/annotation/rmsk.RNA.bed /home/galaxy/galaxy_data/genomes/mm10/annotation/ | |
22 rsync -avzP yq2139@intron.c2b2.columbia.edu://ifs/data/c2b2/cz_lab/genomes/hg19/annotation/rmsk.RNA.bed /home/galaxy/galaxy_data/genomes/hg19/annotation/ | |
23 rsync -avzP yq2139@intron.c2b2.columbia.edu://ifs/data/c2b2/cz_lab/genomes/hg19/annotation/rmsk.bed /home/galaxy/galaxy_data/genomes/hg19/annotation/ | |
24 | |
25 mkdir /home/galaxy/galaxy_data/cache | |
26 # to temporarily store cache for fastq2collapse.pl etc. | |
27 | |
28 install bwa + bam2sam | |
29 --------------------- | |
30 # ref https://biostar.usegalaxy.org/p/24896/#24919 | |
31 # steps: http://intron.c2b2.columbia.edu:8888/ - admin - search tool shed - galaxy main tool shed - bwa - preview and install | |
32 # if uninstall tools, one need to remove corresponding directories, and “reset metadata” in the admin section | |
33 | |
34 # modify tool-data/bwa_mem_index.loc : | |
35 hg19bwa hg19 hg19 /home/galaxy/galaxy_data/genomes/hg19/bwa/hg19.fa | |
36 mm10bwa mm10 mm10 /home/galaxy/galaxy_data/genomes/mm10/bwa/mm10.fa | |
37 | |
38 # modify integrated_tool_panel.xml to change showing order | |
39 | |
40 # add these below (taken from galaxy/config/shed_tool_conf.xml) to config/tool_conf.xml : | |
41 | |
42 <tool file="../shed_tools/toolshed.g2.bx.psu.edu/repos/devteam/bwa/4d82cf59895e/bwa/bwa.xml" g | |
43 uid="toolshed.g2.bx.psu.edu/repos/devteam/bwa/bwa/0.7.16.2"> | |
44 <tool_shed>toolshed.g2.bx.psu.edu</tool_shed> | |
45 <repository_name>bwa</repository_name> | |
46 <repository_owner>devteam</repository_owner> | |
47 <installed_changeset_revision>4d82cf59895e</installed_changeset_revision> | |
48 <id>toolshed.g2.bx.psu.edu/repos/devteam/bwa/bwa/0.7.16.2</id> | |
49 <version>0.7.16.2</version> | |
50 </tool> | |
51 | |
52 <tool file="../shed_tools/toolshed.g2.bx.psu.edu/repos/devteam/bam_to_sam/af7c50162f0b/bam_to_sam/bam_to_sam.xml" guid="toolshed.g2.bx.psu.edu/repos/devteam/bam_to_sam/bam_to_sam/2.0"> | |
53 <tool_shed>toolshed.g2.bx.psu.edu</tool_shed> | |
54 <repository_name>bam_to_sam</repository_name> | |
55 <repository_owner>devteam</repository_owner> | |
56 <installed_changeset_revision>af7c50162f0b</installed_changeset_revision> | |
57 <id>toolshed.g2.bx.psu.edu/repos/devteam/bam_to_sam/bam_to_sam/2.0</id> | |
58 <version>2.0</version> | |
59 </tool> | |
60 | |
61 | |
62 problems fixed | |
63 --------------- | |
64 # (done) redirect stderr to stdout (my $msgio = $outBedFile eq '-' ? *STDERR : *STDOUT;) : fastq_filter.pl stripBarcode.pl parseAlignment.pl fastq2collapse.pl tag2peak.pl tag2profile.pl CIMS.pl tag2cluster.pl | |
65 # (no need if correctly added environment variables) add a new data type "sai"; ref https://biostar.usegalaxy.org/p/24983/ and https://galaxyproject.org/admin/datatypes/adding-datatypes/ | |
66 step1 add the line below to the datatypes section in ./config/datatypes_conf.xml.sample file: | |
67 <datatype extension="sai" type="galaxy.datatypes.binary:Sai" subclass="True"/> | |
68 step2 In binary.py file, add: | |
69 class Sai( Binary ): | |
70 """Class describing a Sai file""" | |
71 file_ext = "sai" | |
72 In registry.py file, add: | |
73 'sai' : binary.Sai(), | |
74 # (done) bed2annotation.pl problem: "cat: write error: Broken pipe" | |
75 # (done) bedExt.pl problem: /data/galaxy/database/files/000/dataset_241.dat already exists | |
76 | |
77 to upload file >2GB | |
78 -------------------- | |
79 #upload the file using rsync or scp to c2b2 server /ifs/scratch/c2b2/cz_lab/web_data/galaxy_tmp | |
80 # Then specify the link to the file in galaxy following the example below: | |
81 # https://zhanglab.c2b2.columbia.edu/data/galaxy_tmp/HepG2.RBFOX2.rep1.R2.fastq.gz | |
82 | |
83 | |
84 test | |
85 ---- | |
86 cd /home/galaxy/galaxy_test/ctk | |
87 rsync -avzP yq2139@intron.c2b2.columbia.edu://mnt/chromatin/archive_proj/cz2294/CLIP_comparison/BrainRbfox/fastq/Fox1_1.fastq.gz . | |
88 #Fox1_1.fastq.gz is std; Fox1_3.fastq.gz is brdu | |
89 | |
90 gzip -cd Fox1_3.fastq.gz |head -1000000 > Fox1_3.1000000lines.raw.fastq | |
91 perl /home/galaxy/tools/CTK/fastq_filter.pl -v -if sanger -f mean:0-38:20 -maxN -1 -of fastq Fox1_3.1000000lines.raw.fastq Fox1_3.1000000lines.filtered.fastq >& fastqfilter.log | |
92 fastx_clipper -a TCGTATGCCGTCTTCTGCTTG -l 29 -n -v -i Fox1_3.1000000lines.filtered.fastq -o Fox1_3.1000000lines.trim1.fastq >& fastx_clipper.log | |
93 fastq_quality_trimmer -i Fox1_3.1000000lines.trim1.fastq -v -t 5 -l 29 -o Fox1_3.1000000lines.trim2.fastq >& fastq_quality_trimmer.log | |
94 perl /home/galaxy/tools/CTK/fastq2collapse.pl Fox1_3.1000000lines.trim2.fastq -v Fox1_3.1000000lines.trim2.c.fastq >& fastq2collapse.log | |
95 perl /home/galaxy/tools/CTK/stripBarcode.pl -v -format fastq -len 14 Fox1_3.1000000lines.trim2.c.fastq Fox1_3.1000000lines.trim2.c.tag.fastq >& stripBarcode.log | |
96 /home/galaxy/tools/bwa-0.7.12/bwa aln -t 4 -n 0.06 -q 20 /home/galaxy/galaxy_data/genomes/mm10/bwa/mm10.fa Fox1_3.1000000lines.trim2.c.tag.fastq > Fox1_3.1000000lines.sai | |
97 # can't make log for bwa aln or the sai file is empty | |
98 /home/galaxy/tools/bwa-0.7.12/bwa samse /home/galaxy/galaxy_data/genomes/mm10/bwa/mm10.fa Fox1_3.1000000lines.sai Fox1_3.1000000lines.trim2.c.tag.fastq > Fox1_3.1000000lines.sam | |
99 perl /home/galaxy/tools/CTK/parseAlignment.pl -v --map-qual 1 --min-len 18 --mutation-file Fox1_3.1000000lines.mutation.txt Fox1_3.1000000lines.sam Fox1_3.1000000lines.tag.bed >& parseAlignment.log | |
100 perl /home/galaxy/tools/CTK/tag2collapse.pl -v -big --random-barcode -EM 30 --seq-error-model alignment -weight --weight-in-name --keep-max-score --keep-tag-name Fox1_3.1000000lines.tag.bed Fox1_3.1000000lines.tag.uniq.bed >& tag2collapse.log | |
101 perl /home/galaxy/tools/CTK/selectRow.pl -q 3 -f 3 Fox1_3.1000000lines.mutation.txt Fox1_3.1000000lines.tag.uniq.bed > Fox1_3.1000000lines.tag.uniq.mutation.txt | |
102 perl /home/galaxy/tools/CTK/bed2annotation.pl -conf /home/galaxy/tools/CTK/annotation.loc -dbkey mm10 -ss -big -region -v Fox1_3.1000000lines.tag.uniq.bed Fox1_3.1000000lines.tag.uniq.annot.txt >& annot.log | |
103 perl /home/galaxy/tools/CTK/tag2peak.pl -big -ss -v --valley-seeking -p 0.05 --valley-depth 0.9 --dbkey mm10 --multi-test Fox1_3.1000000lines.tag.uniq.bed Fox1_3.1000000lines.pool.tag.uniq.peak.sig.bed >& tag2peak.log | |
104 perl /home/galaxy/tools/CTK/tag2profile.pl -v -ss -exact -of bedgraph Fox1_3.1000000lines.tag.uniq.bed Fox1_3.1000000lines.tag.uniq.bedgraph >& bedgraph_tag2profile.log | |
105 awk '{if($9=="-") {print $0}}' Fox1_3.1000000lines.tag.uniq.mutation.txt | cut -f 1-6 > Fox1_3.1000000lines.tag.uniq.del.bed | |
106 perl /home/galaxy/tools/CTK/CIMS.pl -n 10 -p -v Fox1_3.1000000lines.tag.uniq.bed Fox1_3.1000000lines.tag.uniq.del.bed Fox1_3.1000000lines.tag.uniq.del.CIMS.txt >& cims.log | |
107 perl /home/galaxy/tools/CTK/removeRow.pl -q 3 -f 3 -v Fox1_3.1000000lines.tag.uniq.bed Fox1_3.1000000lines.tag.uniq.del.bed > Fox1_3.1000000lines.tag.uniq.clean.bed | |
108 perl /home/galaxy/tools/CTK/bedExt.pl -n up -l -1 -r -1 -v Fox1_3.1000000lines.tag.uniq.clean.bed Fox1_3.1000000lines.tag.uniq.clean.trunc.bed >& bedExt.log | |
109 perl /home/galaxy/tools/CTK/tag2cluster.pl -big -s -maxgap "-1" -of bed -v Fox1_3.1000000lines.tag.uniq.bed Fox1_3.1000000lines.tag.uniq.cluster.0.bed >& tag2cluster.log | |
110 awk '{if($5>2) {print $0}}' Fox1_3.1000000lines.tag.uniq.cluster.0.bed > Fox1_3.1000000lines.tag.uniq.cluster.bed | |
111 perl /home/galaxy/tools/CTK/tag2peak.pl -big -ss -v --prefix "CITS" -gap 25 -p 0.001 -gene Fox1_3.1000000lines.tag.uniq.cluster.bed Fox1_3.1000000lines.tag.uniq.clean.trunc.bed Fox1_3.1000000lines.tag.uniq.clean.CITS.s30.bed >& CITS.log | |
112 | |
113 check | |
114 ----- | |
115 cd /home/yq2139/mnt_prj/CTK_testing/filtering | |
116 diff <(cat /home/galaxy/galaxy_test/ctk/testing_std/Fox1_1.1000000lines.filtered.fastq) <(head -982696 Fox1_1.fastq)|head | |
117 diff <(cat /home/galaxy/galaxy_test/ctk/testing_std/Fox1_1.1000000lines.trim.fastq) <(head -895544 Fox1_1.trim.fastq)|head |