Mercurial > repos > davidvanzessen > imgt_concatenate
comparison imgt_concatenate.sh @ 0:d3cf09f5a1a6 draft
Uploaded
author | davidvanzessen |
---|---|
date | Mon, 29 Aug 2016 05:46:28 -0400 |
parents | |
children | b360a373835f |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d3cf09f5a1a6 |
---|---|
1 #!/bin/bash | |
2 dir="$(cd "$(dirname "$0")" && pwd)" | |
3 | |
4 args=("$@") | |
5 output=$1 | |
6 inputs=("${args[@]:1}") | |
7 | |
8 workdir="$PWD" | |
9 | |
10 echo "Output: $output" | |
11 echo "Inputs: ${inputs[@]}" | |
12 | |
13 mkdir "$workdir/output" | |
14 | |
15 function imgt_unpack { | |
16 local imgt_zip=$1 | |
17 local outdir=$2 | |
18 if [ ! -d "$outdir" ]; then | |
19 mkdir "$outdir" | |
20 fi | |
21 local type="`file $imgt_zip`" | |
22 if [[ "$type" == *"Zip archive"* ]] ; then | |
23 unzip $imgt_zip -d $outdir | |
24 elif [[ "$type" == *"XZ compressed data"* ]] ; then | |
25 mkdir -p $outdir | |
26 echo "tar -xJf $imgt_zip -C $outdir" | |
27 tar -xJf $imgt_zip -C $outdir | |
28 fi | |
29 } | |
30 | |
31 function concat_imgt_files { | |
32 indir=$1 | |
33 outdir=$2 | |
34 start_line=$3 #line # to start at, 2 to skip header | |
35 cat `find $indir/ -name "1_*"` | tail -n+${start_line} >> "$outdir/1_Summary.txt" | |
36 cat `find $indir/ -name "2_*"` | tail -n+${start_line} >> "$outdir/2_IMGT-gapped-nt-sequences.txt" | |
37 cat `find $indir/ -name "3_*"` | tail -n+${start_line} >> "$outdir/3_Nt-sequences.txt" | |
38 cat `find $indir/ -name "4_*"` | tail -n+${start_line} >> "$outdir/4_IMGT-gapped-AA-sequences.txt" | |
39 cat `find $indir/ -name "5_*"` | tail -n+${start_line} >> "$outdir/5_AA-sequences.txt" | |
40 cat `find $indir/ -name "6_*"` | tail -n+${start_line} >> "$outdir/6_Junction.txt" | |
41 cat `find $indir/ -name "7_*"` | tail -n+${start_line} >> "$outdir/7_V-REGION-mutation-and-AA-change-table.txt" | |
42 cat `find $indir/ -name "8_*"` | tail -n+${start_line} >> "$outdir/8_V-REGION-nt-mutation-statistics.txt" | |
43 cat `find $indir/ -name "9_*"` | tail -n+${start_line} >> "$outdir/9_V-REGION-AA-change-statistics.txt" | |
44 cat `find $indir/ -name "10_*"` | tail -n+${start_line} >> "$outdir/10_V-REGION-mutation-hotspots.txt" | |
45 } | |
46 | |
47 echo "Unpacking IMGT file 1.." | |
48 imgt_unpack ${inputs[0]} "$workdir/input1" | |
49 | |
50 echo "Concatenating IMGT file 1..." | |
51 concat_imgt_files "$workdir/input1" "$workdir/output" 1 | |
52 | |
53 remaining_inputs=("${inputs[@]:1}") | |
54 | |
55 i="2" | |
56 for input in "${remaining_inputs[@]}" | |
57 do | |
58 echo "Unpacking IMGT file $i.." | |
59 current_dir="$workdir/input${i}" | |
60 imgt_unpack "${input}" "${current_dir}" | |
61 echo "Concatenating IMGT file $1..." | |
62 concat_imgt_files "${current_dir}" "$workdir/output" 2 | |
63 i=$((i+1)) | |
64 done | |
65 | |
66 echo "Creating new IMGT zip" | |
67 cd "$workdir/output" | |
68 tar cfJ "$output" * | |
69 | |
70 #awk to fix the sequence numbers repeating? | |
71 | |
72 echo "Done" |