comparison imgt_concatenate.sh @ 0:d3cf09f5a1a6 draft

Uploaded
author davidvanzessen
date Mon, 29 Aug 2016 05:46:28 -0400
parents
children b360a373835f
comparison
equal deleted inserted replaced
-1:000000000000 0:d3cf09f5a1a6
1 #!/bin/bash
2 dir="$(cd "$(dirname "$0")" && pwd)"
3
4 args=("$@")
5 output=$1
6 inputs=("${args[@]:1}")
7
8 workdir="$PWD"
9
10 echo "Output: $output"
11 echo "Inputs: ${inputs[@]}"
12
13 mkdir "$workdir/output"
14
15 function imgt_unpack {
16 local imgt_zip=$1
17 local outdir=$2
18 if [ ! -d "$outdir" ]; then
19 mkdir "$outdir"
20 fi
21 local type="`file $imgt_zip`"
22 if [[ "$type" == *"Zip archive"* ]] ; then
23 unzip $imgt_zip -d $outdir
24 elif [[ "$type" == *"XZ compressed data"* ]] ; then
25 mkdir -p $outdir
26 echo "tar -xJf $imgt_zip -C $outdir"
27 tar -xJf $imgt_zip -C $outdir
28 fi
29 }
30
31 function concat_imgt_files {
32 indir=$1
33 outdir=$2
34 start_line=$3 #line # to start at, 2 to skip header
35 cat `find $indir/ -name "1_*"` | tail -n+${start_line} >> "$outdir/1_Summary.txt"
36 cat `find $indir/ -name "2_*"` | tail -n+${start_line} >> "$outdir/2_IMGT-gapped-nt-sequences.txt"
37 cat `find $indir/ -name "3_*"` | tail -n+${start_line} >> "$outdir/3_Nt-sequences.txt"
38 cat `find $indir/ -name "4_*"` | tail -n+${start_line} >> "$outdir/4_IMGT-gapped-AA-sequences.txt"
39 cat `find $indir/ -name "5_*"` | tail -n+${start_line} >> "$outdir/5_AA-sequences.txt"
40 cat `find $indir/ -name "6_*"` | tail -n+${start_line} >> "$outdir/6_Junction.txt"
41 cat `find $indir/ -name "7_*"` | tail -n+${start_line} >> "$outdir/7_V-REGION-mutation-and-AA-change-table.txt"
42 cat `find $indir/ -name "8_*"` | tail -n+${start_line} >> "$outdir/8_V-REGION-nt-mutation-statistics.txt"
43 cat `find $indir/ -name "9_*"` | tail -n+${start_line} >> "$outdir/9_V-REGION-AA-change-statistics.txt"
44 cat `find $indir/ -name "10_*"` | tail -n+${start_line} >> "$outdir/10_V-REGION-mutation-hotspots.txt"
45 }
46
47 echo "Unpacking IMGT file 1.."
48 imgt_unpack ${inputs[0]} "$workdir/input1"
49
50 echo "Concatenating IMGT file 1..."
51 concat_imgt_files "$workdir/input1" "$workdir/output" 1
52
53 remaining_inputs=("${inputs[@]:1}")
54
55 i="2"
56 for input in "${remaining_inputs[@]}"
57 do
58 echo "Unpacking IMGT file $i.."
59 current_dir="$workdir/input${i}"
60 imgt_unpack "${input}" "${current_dir}"
61 echo "Concatenating IMGT file $1..."
62 concat_imgt_files "${current_dir}" "$workdir/output" 2
63 i=$((i+1))
64 done
65
66 echo "Creating new IMGT zip"
67 cd "$workdir/output"
68 tar cfJ "$output" *
69
70 #awk to fix the sequence numbers repeating?
71
72 echo "Done"