0
|
1 #!/bin/bash
|
|
2 dir="$(cd "$(dirname "$0")" && pwd)"
|
|
3
|
|
4 args=("$@")
|
|
5 output=$1
|
|
6 inputs=("${args[@]:1}")
|
|
7
|
|
8 workdir="$PWD"
|
|
9
|
|
10 echo "Output: $output"
|
|
11 echo "Inputs: ${inputs[@]}"
|
|
12
|
|
13 mkdir "$workdir/output"
|
|
14
|
|
15 function imgt_unpack {
|
|
16 local imgt_zip=$1
|
|
17 local outdir=$2
|
|
18 if [ ! -d "$outdir" ]; then
|
|
19 mkdir "$outdir"
|
|
20 fi
|
|
21 local type="`file $imgt_zip`"
|
|
22 if [[ "$type" == *"Zip archive"* ]] ; then
|
|
23 unzip $imgt_zip -d $outdir
|
|
24 elif [[ "$type" == *"XZ compressed data"* ]] ; then
|
|
25 mkdir -p $outdir
|
|
26 echo "tar -xJf $imgt_zip -C $outdir"
|
|
27 tar -xJf $imgt_zip -C $outdir
|
|
28 fi
|
|
29 }
|
|
30
|
|
31 function concat_imgt_files {
|
|
32 indir=$1
|
|
33 outdir=$2
|
|
34 start_line=$3 #line # to start at, 2 to skip header
|
1
|
35 id=$4
|
2
|
36 if [[ "${start_line}" == "1" ]] ; then
|
|
37 cat `find $indir/ -name "1_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/1_Summary.txt"
|
|
38 cat `find $indir/ -name "2_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/2_IMGT-gapped-nt-sequences.txt"
|
|
39 cat `find $indir/ -name "3_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/3_Nt-sequences.txt"
|
|
40 cat `find $indir/ -name "4_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/4_IMGT-gapped-AA-sequences.txt"
|
|
41 cat `find $indir/ -name "5_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/5_AA-sequences.txt"
|
|
42 cat `find $indir/ -name "6_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/6_Junction.txt"
|
|
43 cat `find $indir/ -name "7_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/7_V-REGION-mutation-and-AA-change-table.txt"
|
|
44 cat `find $indir/ -name "8_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/8_V-REGION-nt-mutation-statistics.txt"
|
|
45 cat `find $indir/ -name "9_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/9_V-REGION-AA-change-statistics.txt"
|
|
46 cat `find $indir/ -name "10_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/10_V-REGION-mutation-hotspots.txt"
|
|
47 else
|
|
48 cat `find $indir/ -name "1_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/1_Summary.txt"
|
|
49 cat `find $indir/ -name "2_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/2_IMGT-gapped-nt-sequences.txt"
|
|
50 cat `find $indir/ -name "3_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/3_Nt-sequences.txt"
|
|
51 cat `find $indir/ -name "4_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/4_IMGT-gapped-AA-sequences.txt"
|
|
52 cat `find $indir/ -name "5_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/5_AA-sequences.txt"
|
|
53 cat `find $indir/ -name "6_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/6_Junction.txt"
|
|
54 cat `find $indir/ -name "7_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/7_V-REGION-mutation-and-AA-change-table.txt"
|
|
55 cat `find $indir/ -name "8_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/8_V-REGION-nt-mutation-statistics.txt"
|
|
56 cat `find $indir/ -name "9_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/9_V-REGION-AA-change-statistics.txt"
|
|
57 cat `find $indir/ -name "10_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/10_V-REGION-mutation-hotspots.txt"
|
|
58 fi
|
|
59
|
0
|
60 }
|
|
61
|
|
62 echo "Unpacking IMGT file 1.."
|
|
63 imgt_unpack ${inputs[0]} "$workdir/input1"
|
|
64
|
|
65 echo "Concatenating IMGT file 1..."
|
1
|
66 id=${inputs[1]}
|
|
67 concat_imgt_files "$workdir/input1" "$workdir/output" 1 $id
|
0
|
68
|
1
|
69 remaining_inputs=("${inputs[@]:2}")
|
0
|
70
|
1
|
71 i="0"
|
|
72 while [ $i -lt ${#remaining_inputs[@]} ]; do
|
|
73 j=$((i+1))
|
|
74 input="${remaining_inputs[$i]}"
|
|
75 id="${remaining_inputs[$j]}"
|
|
76
|
|
77 echo "Unpacking IMGT file $j.."
|
0
|
78 current_dir="$workdir/input${i}"
|
|
79 imgt_unpack "${input}" "${current_dir}"
|
|
80 echo "Concatenating IMGT file $1..."
|
1
|
81 concat_imgt_files "${current_dir}" "$workdir/output" 2 $id
|
|
82 i=$((i+2))
|
0
|
83 done
|
|
84
|
2
|
85 echo "`head $workdir/output/1_Summary.txt`"
|
|
86
|
1
|
87
|
0
|
88 echo "Creating new IMGT zip"
|
|
89 cd "$workdir/output"
|
|
90 tar cfJ "$output" *
|
|
91
|
|
92 #awk to fix the sequence numbers repeating?
|
|
93
|
|
94 echo "Done"
|
1
|
95
|
|
96 exit 0
|
|
97
|
|
98 i="1"
|
|
99 for input in "${remaining_inputs[@]}"
|
|
100 do
|
|
101 echo "Unpacking IMGT file $i.."
|
|
102 current_dir="$workdir/input${i}"
|
|
103 imgt_unpack "${input}" "${current_dir}"
|
|
104 echo "Concatenating IMGT file $1..."
|
|
105 concat_imgt_files "${current_dir}" "$workdir/output" 2 $id
|
|
106 i=$((i+1))
|
|
107 done
|
|
108
|