annotate commandline_sample_STR-FM_estimate_mininum_informative_Read_Depth @ 6:d75894f5d61b draft

Uploaded
author arkarachai-fungtammasan
date Sat, 22 Aug 2015 12:13:34 -0400
parents d5ed5c2e25c3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
1 ## This is a sample PBS script for profiling STR from reference genome using STR-FM
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
2 ##
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
3 ##requirement
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
4 ##1 STR error rates (can be downloaded from https://usegalaxy.org/u/guru%40psu.edu/h/error-rates-files) --> errorrate.bymajorallele
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
5 ##
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
6 echo " "
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
7 echo " "
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
8 echo "Job started on `hostname` at `date`"
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
9 cd /working/directory/
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
10 echo ${MOTIF}
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
11 echo ${OUTPUT}
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
12 echo " "
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
13 echo "Generate all possible combination of STR length profile" ## See detail in profilegenerator.xml on https://github.com/Arkarachai/STR-FM
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
14 python profilegenerator.py errorrate.bymajorallele ${MOTIF} 30 > ${OUTPUT}.30
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
15
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
16 echo "remove duplicated profiles"
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
17 cat ${OUTPUT}.30 | sort | uniq > ${OUTPUT}.30.sort
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
18
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
19 echo "genotyping using error correction model" ## See detail in GenotypingSTR.xml on https://github.com/Arkarachai/STR-FM
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
20 python GenotypeTRcorrection.py ${OUTPUT}.30.sort errorrate.bymajorallele ${OUTPUT}.30.prob 0.5
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
21
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
22 echo "select only full motif different --> need to replace 4 with motif size (1-6)"
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
23 cat ${OUTPUT}.30.prob | grep hetero | awk '(($7-$8)==4) || (($8-$7)==4) {print $0}' > ${OUTPUT}.30.prob.screen
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
24
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
25 echo "Evaluate the probability of the allele combination to generate read profile" ## See detail in probvalueforhetero.xml on https://github.com/Arkarachai/STR-FM
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
26 python heteroprob.py ${OUTPUT}.30.prob.screen ${INPUT} > ${OUTPUT}.30.bino
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
27
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
28 echo "formatting"
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
29 cat ${OUTPUT}.30.bino | sort -k 12n,12 -k 6n,6 > ${OUTPUT}.30.bino.sort
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
30
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
31 echo "Combine read profile probabilities" ## See detail in combineprobforallelecombination.xml on https://github.com/Arkarachai/STR-FM
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
32 python combinedprobforallelecombination.py ${OUTPUT}.30.bino.sort > ${OUTPUT}.30.bino.sort.plot
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
33
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
34
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
35 echo "Job end on `hostname` at `date`"