Mercurial > repos > thondeboer > neat_genreads
annotate test-data/create_test-data @ 0:6e75a84e9338 draft
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
author | thondeboer |
---|---|
date | Tue, 15 May 2018 02:39:53 -0400 (2018-05-15) |
parents | |
children |
rev | line source |
---|---|
0
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
1 #!/bin/bash |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
2 source ~/env/bin/activate |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
3 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
4 samtools faidx chrMT.fa |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
5 #TEST1: single read, with everything default |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
6 python2 ../genReads.py -r chrMT.fa -R 101 -o out --rng 1 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
7 mv out_read1.fq chrMT_read1.fq |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
8 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
9 #TEST2: PE reads, with everything default |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
10 python2 ../genReads.py -r chrMT.fa -R 101 -o out --rng 1 --pe 300 30 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
11 mv out_read1.fq chrMT-PE_read1.fq |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
12 mv out_read2.fq chrMT-PE_read2.fq |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
13 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
14 #TEST3: PE reads, with everything default, now with VCF and BAM files |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
15 python2 ../genReads.py -r chrMT.fa -R 101 -o out --rng 1 --pe 300 30 --vcf --bam |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
16 mv out_read1.fq chrMT-PE-VCF-BAM_read1.fq |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
17 mv out_read2.fq chrMT-PE-VCF-BAM_read2.fq |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
18 mv out_golden.bam chrMT-PE-VCF-BAM.bam |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
19 mv out_golden.vcf chrMT-PE-VCF-BAM.vcf |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
20 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
21 samtools index chrMT-PE-VCF-BAM.bam |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
22 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
23 #TEST4: PE reads, with VCF and BAM files and VCF file from TEST3 as the seed |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
24 python2 ../genReads.py -r chrMT.fa -R 101 -o out --rng 1 --pe 300 30 --vcf --bam -v chrMT-PE-VCF-BAM.vcf |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
25 mv out_read1.fq chrMT-PE-VCF-BAM-vcf_read1.fq |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
26 mv out_read2.fq chrMT-PE-VCF-BAM-vcf_read2.fq |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
27 mv out_golden.bam chrMT-PE-VCF-BAM-vcf.bam |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
28 mv out_golden.vcf chrMT-PE-VCF-BAM-vcf.vcf |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
29 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
30 samtools index chrMT-PE-VCF-BAM-vcf.bam |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
31 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
32 #TEST5: PE reads, with VCF and BAM files and BED file as the targeted region |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
33 python2 ../genReads.py -r chrMT.fa -R 101 -o out --rng 1 --pe 300 30 --vcf --bam -t chrMT-Targets.bed -to 0.02 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
34 mv out_read1.fq chrMT-PE-VCF-BAM-Targeted_read1.fq |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
35 mv out_read2.fq chrMT-PE-VCF-BAM-Targeted_read2.fq |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
36 mv out_golden.bam chrMT-PE-VCF-BAM-Targeted.bam |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
37 mv out_golden.vcf chrMT-PE-VCF-BAM-Targeted.vcf |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
38 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
39 samtools index chrMT-PE-VCF-BAM-Targeted.bam |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
40 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
41 #TEST6: PE reads, with everything default, now with VCF and BAM files GZIPPED |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
42 python2 ../genReads.py -r chrMT.fa -R 101 -o out --rng 1 --pe 300 30 --vcf --bam --gz |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
43 mv out_read1.fq.gz chrMT-PE-VCF-BAM-gz_read1.fq.gz |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
44 mv out_read2.fq.gz chrMT-PE-VCF-BAM-gz_read2.fq.gz |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
45 mv out_golden.bam chrMT-PE-VCF-BAM-gz.bam |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
46 #Galaxy does not support gzipped VCF file (not sure if this is BGZIPPED) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
47 gunzip out_golden.vcf.gz && mv out_golden.vcf chrMT-PE-VCF-BAM-gz.vcf |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
48 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
49 samtools index chrMT-PE-VCF-BAM-gz.bam |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
50 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
51 #TEST7: PE reads, with all error parameters changed, with VCF and BAM files, not compressed |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
52 python2 ../genReads.py -r chrMT.fa -R 151 -o out --rng 123 --pe 500 50 --vcf --bam -c 20 -E 0.123 -M 0.123 -p 3 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
53 mv out_read1.fq chrMT-PE-VCF-BAM-panic_read1.fq |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
54 mv out_read2.fq chrMT-PE-VCF-BAM-panic_read2.fq |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
55 mv out_golden.bam chrMT-PE-VCF-BAM-panic.bam |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
56 mv out_golden.vcf chrMT-PE-VCF-BAM-panic.vcf |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
57 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
58 samtools index chrMT-PE-VCF-BAM-panic.bam |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
59 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
60 ##### |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
61 # computeGC TESTS |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
62 # |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
63 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
64 #TEST1: Use BAM from TEST3 to create the model file. Window size = 10 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
65 bedtools genomecov -d -ibam chrMT-PE-VCF-BAM.bam -g chrMT.fa > chrMT-PE-VCF-BAM.genomecov |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
66 python2 ../utilities/computeGC.py -r chrMT.fa -i chrMT-PE-VCF-BAM.genomecov -w 10 -o chrMT-PE-VCF-BAM-computeGC.p |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
67 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
68 ##### |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
69 # computeFraglen |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
70 # |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
71 samtools view chrMT-PE-VCF-BAM.bam | python2 ../utilities/computeFraglen.py |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
72 mv fraglen.p chrMT-PE-VCF-BAM-fraglen.p |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
73 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
74 ##### |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
75 # genMutModel |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
76 # |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
77 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
78 #TEST1: Default settings |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
79 python2 ../utilities/genMutModel.py -r chrMT.fa -m chrMT-PE-VCF-BAM.vcf -o chrMT-PE-VCF-BAM-genMutModel.p |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
80 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
81 #TEST2: Defined include list |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
82 python2 ../utilities/genMutModel.py -r chrMT.fa -m chrMT-PE-VCF-BAM.vcf -o chrMT-PE-VCF-BAM-INCLUDELIST-genMutModel.p\ |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
83 -bi chrMT-Targets.bed |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
84 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
85 #TEST3: Defined exclude list |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
86 python2 ../utilities/genMutModel.py -r chrMT.fa -m chrMT-PE-VCF-BAM.vcf -o chrMT-PE-VCF-BAM-EXCLUDELIST-genMutModel.p\ |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
87 -be chrMT-Targets.bed |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
88 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
89 #TEST1: Default settings with all booleans set to YES |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
90 python2 ../utilities/genMutModel.py -r chrMT.fa -m chrMT-PE-VCF-BAM.vcf -o chrMT-PE-VCF-BAM-BOOLEANS-genMutModel.p\ |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
91 --save-trinuc --no-whitelist --skip-common |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
92 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
93 ##### |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
94 # genSeqErrorModel |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
95 # |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
96 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
97 #TEST1 - 100,000 simulations - Single read |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
98 python2 ../utilities/genSeqErrorModel.py -i chrMT_read1.fq -s 100000 -o chrMT_read1_genSeqErrorModel.p |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
99 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
100 #TEST2 - 100,000 simulations - Paired reads |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
101 python2 ../utilities/genSeqErrorModel.py -i chrMT-PE_read1.fq -i2 chrMT-PE_read2.fq -s 100000 -o chrMT-PE_read1_genSeqErrorModel.p |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
102 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
103 #TEST3 - 100,000 simulations - Only 100 reads |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
104 python2 ../utilities/genSeqErrorModel.py -i chrMT-PE_read1.fq -i2 chrMT-PE_read2.fq -s 100000 -o chrMT-PE-100reads_read1_genSeqErrorModel.p -n 100 |