diff notes.txt @ 0:0475e4175855 draft default tip

planemo upload commit 81ece2551cea27cbd0e718ef5b7a2fe8d4abd071-dirty
author yqiancolumbia
date Mon, 30 Apr 2018 05:25:11 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/notes.txt	Mon Apr 30 05:25:11 2018 -0400
@@ -0,0 +1,117 @@
+https://galaxyproject.org/admin/data-integration/
+
+su -l galaxy
+
+prerequisites
+-------------
+cd tools
+ln -s /home/yq2139/czlab_src/CTK/ CTK
+ln -s /home/yq2139/czlab_src/ngs/ ngs
+ln -s /home/yq2139/czlab_src/plib/ plib
+rsync -avzP yq2139@intron.c2b2.columbia.edu:/ifs/data/c2b2/cz_lab/tools/fastx_toolkit_0.0.14/ .
+#rsync -avzP yq2139@intron.c2b2.columbia.edu:/ifs/data/c2b2/cz_lab/tools/bwa-0.7.12/ .
+
+# IMPORTANT add /home/galaxy/tools/fastx_toolkit_0.0.14/bin to PATH in ~/.bash_profile 
+# same for samtools installed later
+
+cd /home/galaxy/galaxy_data
+rsync -avzP yq2139@intron.c2b2.columbia.edu:/ifs/data/c2b2/cz_lab/genomes/mm10/bwa/* genomes/mm10/bwa/
+rsync -avzP yq2139@intron.c2b2.columbia.edu:/ifs/data/c2b2/cz_lab/genomes/hg19/bwa/* genomes/hg19/bwa/
+rsync -avzP yq2139@intron.c2b2.columbia.edu://ifs/data/c2b2/cz_lab/genomes/mm10/annotation/rmsk.bed /home/galaxy/galaxy_data/genomes/mm10/annotation/
+rsync -avzP yq2139@intron.c2b2.columbia.edu://ifs/data/c2b2/cz_lab/genomes/mm10/annotation/rmsk.RNA.bed /home/galaxy/galaxy_data/genomes/mm10/annotation/
+rsync -avzP yq2139@intron.c2b2.columbia.edu://ifs/data/c2b2/cz_lab/genomes/hg19/annotation/rmsk.RNA.bed /home/galaxy/galaxy_data/genomes/hg19/annotation/
+rsync -avzP yq2139@intron.c2b2.columbia.edu://ifs/data/c2b2/cz_lab/genomes/hg19/annotation/rmsk.bed /home/galaxy/galaxy_data/genomes/hg19/annotation/
+
+mkdir /home/galaxy/galaxy_data/cache
+# to temporarily store cache for fastq2collapse.pl etc.
+
+install bwa + bam2sam
+---------------------
+# ref https://biostar.usegalaxy.org/p/24896/#24919 
+# steps: http://intron.c2b2.columbia.edu:8888/ - admin - search tool shed - galaxy main tool shed - bwa - preview and install
+# if uninstall tools, one need to remove corresponding directories, and “reset metadata” in the admin section
+
+# modify tool-data/bwa_mem_index.loc : 
+	hg19bwa hg19    hg19  /home/galaxy/galaxy_data/genomes/hg19/bwa/hg19.fa
+	mm10bwa mm10    mm10  /home/galaxy/galaxy_data/genomes/mm10/bwa/mm10.fa
+
+# modify integrated_tool_panel.xml to change showing order
+
+# add these below (taken from galaxy/config/shed_tool_conf.xml) to config/tool_conf.xml :
+
+    <tool file="../shed_tools/toolshed.g2.bx.psu.edu/repos/devteam/bwa/4d82cf59895e/bwa/bwa.xml" g
+uid="toolshed.g2.bx.psu.edu/repos/devteam/bwa/bwa/0.7.16.2">
+      <tool_shed>toolshed.g2.bx.psu.edu</tool_shed>
+        <repository_name>bwa</repository_name>
+        <repository_owner>devteam</repository_owner>
+        <installed_changeset_revision>4d82cf59895e</installed_changeset_revision>
+        <id>toolshed.g2.bx.psu.edu/repos/devteam/bwa/bwa/0.7.16.2</id>
+        <version>0.7.16.2</version>
+    </tool>
+
+    <tool file="../shed_tools/toolshed.g2.bx.psu.edu/repos/devteam/bam_to_sam/af7c50162f0b/bam_to_sam/bam_to_sam.xml" guid="toolshed.g2.bx.psu.edu/repos/devteam/bam_to_sam/bam_to_sam/2.0">
+      <tool_shed>toolshed.g2.bx.psu.edu</tool_shed>
+        <repository_name>bam_to_sam</repository_name>
+        <repository_owner>devteam</repository_owner>
+        <installed_changeset_revision>af7c50162f0b</installed_changeset_revision>
+        <id>toolshed.g2.bx.psu.edu/repos/devteam/bam_to_sam/bam_to_sam/2.0</id>
+        <version>2.0</version>
+    </tool>
+
+
+problems fixed
+---------------
+# (done) redirect stderr to stdout (my $msgio = $outBedFile eq '-' ? *STDERR :  *STDOUT;) : fastq_filter.pl stripBarcode.pl parseAlignment.pl fastq2collapse.pl  tag2peak.pl tag2profile.pl CIMS.pl tag2cluster.pl
+# (no need if correctly added environment variables) add a new data type "sai"; ref https://biostar.usegalaxy.org/p/24983/ and  https://galaxyproject.org/admin/datatypes/adding-datatypes/
+	step1 add the line below to the datatypes section in ./config/datatypes_conf.xml.sample file:
+		<datatype extension="sai" type="galaxy.datatypes.binary:Sai" subclass="True"/>
+	step2 In binary.py file, add:
+		class Sai( Binary ):
+		"""Class describing a Sai file"""
+		file_ext = "sai"
+	In registry.py file, add:
+		'sai' : binary.Sai(),
+# (done) bed2annotation.pl problem: "cat: write error: Broken pipe" 
+# (done) bedExt.pl problem: /data/galaxy/database/files/000/dataset_241.dat already exists
+
+to upload file >2GB 
+--------------------
+#upload the file using rsync or scp to c2b2 server /ifs/scratch/c2b2/cz_lab/web_data/galaxy_tmp
+# Then specify the link to the file in galaxy following the example below:
+# https://zhanglab.c2b2.columbia.edu/data/galaxy_tmp/HepG2.RBFOX2.rep1.R2.fastq.gz 
+
+
+test
+----
+cd /home/galaxy/galaxy_test/ctk
+rsync -avzP yq2139@intron.c2b2.columbia.edu://mnt/chromatin/archive_proj/cz2294/CLIP_comparison/BrainRbfox/fastq/Fox1_1.fastq.gz .
+#Fox1_1.fastq.gz is std; Fox1_3.fastq.gz is brdu
+
+gzip -cd Fox1_3.fastq.gz |head -1000000 > Fox1_3.1000000lines.raw.fastq
+perl /home/galaxy/tools/CTK/fastq_filter.pl -v -if sanger -f mean:0-38:20 -maxN -1 -of fastq Fox1_3.1000000lines.raw.fastq Fox1_3.1000000lines.filtered.fastq >& fastqfilter.log
+fastx_clipper -a TCGTATGCCGTCTTCTGCTTG  -l 29 -n -v -i Fox1_3.1000000lines.filtered.fastq -o Fox1_3.1000000lines.trim1.fastq >& fastx_clipper.log
+fastq_quality_trimmer -i Fox1_3.1000000lines.trim1.fastq -v -t 5 -l 29 -o Fox1_3.1000000lines.trim2.fastq >& fastq_quality_trimmer.log
+perl /home/galaxy/tools/CTK/fastq2collapse.pl Fox1_3.1000000lines.trim2.fastq -v Fox1_3.1000000lines.trim2.c.fastq >& fastq2collapse.log
+perl /home/galaxy/tools/CTK/stripBarcode.pl -v -format fastq -len 14 Fox1_3.1000000lines.trim2.c.fastq Fox1_3.1000000lines.trim2.c.tag.fastq >& stripBarcode.log
+/home/galaxy/tools/bwa-0.7.12/bwa aln -t 4 -n 0.06 -q 20 /home/galaxy/galaxy_data/genomes/mm10/bwa/mm10.fa Fox1_3.1000000lines.trim2.c.tag.fastq > Fox1_3.1000000lines.sai 
+# can't make log for bwa aln or the sai file is empty
+/home/galaxy/tools/bwa-0.7.12/bwa samse  /home/galaxy/galaxy_data/genomes/mm10/bwa/mm10.fa Fox1_3.1000000lines.sai Fox1_3.1000000lines.trim2.c.tag.fastq > Fox1_3.1000000lines.sam
+perl /home/galaxy/tools/CTK/parseAlignment.pl -v --map-qual 1 --min-len 18 --mutation-file Fox1_3.1000000lines.mutation.txt Fox1_3.1000000lines.sam Fox1_3.1000000lines.tag.bed >& parseAlignment.log
+perl /home/galaxy/tools/CTK/tag2collapse.pl -v -big --random-barcode -EM 30 --seq-error-model alignment -weight --weight-in-name --keep-max-score --keep-tag-name Fox1_3.1000000lines.tag.bed Fox1_3.1000000lines.tag.uniq.bed >& tag2collapse.log
+perl /home/galaxy/tools/CTK/selectRow.pl -q 3 -f 3 Fox1_3.1000000lines.mutation.txt  Fox1_3.1000000lines.tag.uniq.bed   > Fox1_3.1000000lines.tag.uniq.mutation.txt
+perl /home/galaxy/tools/CTK/bed2annotation.pl -conf /home/galaxy/tools/CTK/annotation.loc -dbkey mm10 -ss -big -region  -v Fox1_3.1000000lines.tag.uniq.bed  Fox1_3.1000000lines.tag.uniq.annot.txt >& annot.log
+perl /home/galaxy/tools/CTK/tag2peak.pl -big -ss -v --valley-seeking -p 0.05 --valley-depth 0.9 --dbkey mm10 --multi-test Fox1_3.1000000lines.tag.uniq.bed Fox1_3.1000000lines.pool.tag.uniq.peak.sig.bed >& tag2peak.log
+perl /home/galaxy/tools/CTK/tag2profile.pl -v -ss -exact -of bedgraph  Fox1_3.1000000lines.tag.uniq.bed Fox1_3.1000000lines.tag.uniq.bedgraph >& bedgraph_tag2profile.log
+awk '{if($9=="-") {print $0}}'  Fox1_3.1000000lines.tag.uniq.mutation.txt | cut -f 1-6 > Fox1_3.1000000lines.tag.uniq.del.bed
+perl /home/galaxy/tools/CTK/CIMS.pl -n 10 -p -v Fox1_3.1000000lines.tag.uniq.bed Fox1_3.1000000lines.tag.uniq.del.bed Fox1_3.1000000lines.tag.uniq.del.CIMS.txt >& cims.log
+perl /home/galaxy/tools/CTK/removeRow.pl -q 3 -f 3 -v Fox1_3.1000000lines.tag.uniq.bed Fox1_3.1000000lines.tag.uniq.del.bed > Fox1_3.1000000lines.tag.uniq.clean.bed
+perl /home/galaxy/tools/CTK/bedExt.pl -n up -l -1 -r -1 -v Fox1_3.1000000lines.tag.uniq.clean.bed Fox1_3.1000000lines.tag.uniq.clean.trunc.bed >& bedExt.log
+perl /home/galaxy/tools/CTK/tag2cluster.pl -big -s -maxgap "-1" -of bed -v Fox1_3.1000000lines.tag.uniq.bed Fox1_3.1000000lines.tag.uniq.cluster.0.bed >& tag2cluster.log
+awk '{if($5>2) {print $0}}' Fox1_3.1000000lines.tag.uniq.cluster.0.bed > Fox1_3.1000000lines.tag.uniq.cluster.bed
+perl /home/galaxy/tools/CTK/tag2peak.pl -big -ss -v --prefix "CITS" -gap 25 -p 0.001 -gene Fox1_3.1000000lines.tag.uniq.cluster.bed Fox1_3.1000000lines.tag.uniq.clean.trunc.bed Fox1_3.1000000lines.tag.uniq.clean.CITS.s30.bed >& CITS.log
+
+check
+-----
+cd /home/yq2139/mnt_prj/CTK_testing/filtering
+diff <(cat /home/galaxy/galaxy_test/ctk/testing_std/Fox1_1.1000000lines.filtered.fastq) <(head  -982696 Fox1_1.fastq)|head
+diff <(cat /home/galaxy/galaxy_test/ctk/testing_std/Fox1_1.1000000lines.trim.fastq) <(head  -895544 Fox1_1.trim.fastq)|head