view imgt_concatenate.sh @ 0:d3cf09f5a1a6 draft

Uploaded
author davidvanzessen
date Mon, 29 Aug 2016 05:46:28 -0400
parents
children b360a373835f
line wrap: on
line source

#!/bin/bash
dir="$(cd "$(dirname "$0")" && pwd)"

args=("$@")
output=$1
inputs=("${args[@]:1}")

workdir="$PWD"

echo "Output: $output"
echo "Inputs: ${inputs[@]}"

mkdir "$workdir/output"

function imgt_unpack {
	local imgt_zip=$1
	local outdir=$2
	if [ ! -d "$outdir" ]; then
		mkdir "$outdir"
	fi
	local type="`file $imgt_zip`"
	if [[ "$type" == *"Zip archive"* ]] ; then
		unzip $imgt_zip -d $outdir
	elif [[ "$type" == *"XZ compressed data"* ]] ; then
		mkdir -p $outdir
		echo "tar -xJf $imgt_zip -C $outdir"
		tar -xJf $imgt_zip -C $outdir
	fi
}

function concat_imgt_files {
	indir=$1
	outdir=$2
	start_line=$3 #line # to start at, 2 to skip header
	cat `find $indir/ -name "1_*"` | tail -n+${start_line} >> "$outdir/1_Summary.txt"
	cat `find $indir/ -name "2_*"` | tail -n+${start_line} >> "$outdir/2_IMGT-gapped-nt-sequences.txt"
	cat `find $indir/ -name "3_*"` | tail -n+${start_line} >> "$outdir/3_Nt-sequences.txt"
	cat `find $indir/ -name "4_*"` | tail -n+${start_line} >> "$outdir/4_IMGT-gapped-AA-sequences.txt"
	cat `find $indir/ -name "5_*"` | tail -n+${start_line} >> "$outdir/5_AA-sequences.txt"
	cat `find $indir/ -name "6_*"` | tail -n+${start_line} >> "$outdir/6_Junction.txt"
	cat `find $indir/ -name "7_*"` | tail -n+${start_line} >> "$outdir/7_V-REGION-mutation-and-AA-change-table.txt"
	cat `find $indir/ -name "8_*"` | tail -n+${start_line} >> "$outdir/8_V-REGION-nt-mutation-statistics.txt"
	cat `find $indir/ -name "9_*"` | tail -n+${start_line} >> "$outdir/9_V-REGION-AA-change-statistics.txt"
	cat `find $indir/ -name "10_*"` | tail -n+${start_line} >> "$outdir/10_V-REGION-mutation-hotspots.txt"
}

echo "Unpacking IMGT file 1.."
imgt_unpack ${inputs[0]} "$workdir/input1"

echo "Concatenating IMGT file 1..."
concat_imgt_files "$workdir/input1" "$workdir/output" 1

remaining_inputs=("${inputs[@]:1}")

i="2"
for input in "${remaining_inputs[@]}"
do
	echo "Unpacking IMGT file $i.."
	current_dir="$workdir/input${i}"
	imgt_unpack "${input}" "${current_dir}"
	echo "Concatenating IMGT file $1..."
	concat_imgt_files "${current_dir}" "$workdir/output" 2
	i=$((i+1))
done

echo "Creating new IMGT zip"
cd "$workdir/output"
tar cfJ "$output" *

#awk to fix the sequence numbers repeating?

echo "Done"