Mercurial > repos > davidvanzessen > argalaxy_tools
view experimental_design/experimental_design.py @ 19:3ef457aa5df6 draft
Uploaded
author | davidvanzessen |
---|---|
date | Thu, 22 Dec 2016 03:43:02 -0500 |
parents | afe85eb6572e |
children |
line wrap: on
line source
import sys import pandas as pd def main(): patients = {} files = [] sample_id = sys.argv[1] imgt_files = 0 blast_files = 0 #organize files for arg in sys.argv[2:-2]: if arg.find("/") is -1: patients[sample_id] = files files = [] sample_id = arg else: df = pd.read_csv(arg, sep="\t", dtype=object, error_bad_lines=False) if "Functionality" in list(df.columns.values): df["VDJ Frame"][df["Functionality"] != "productive"] = "In-frame with stop codon" imgt_files += 1 else: blast_files += 1 files.append(df) patients[sample_id] = files columns = [u'ID', u'VDJ Frame', u'Top V Gene', u'Top D Gene', u'Top J Gene', u'CDR1 Seq', u'CDR1 Length', u'CDR2 Seq', u'CDR2 Length', u'CDR3 Seq', u'CDR3 Length', u'CDR3 Seq DNA', u'CDR3 Length DNA', u'Strand', u'CDR3 Found How', u'Functionality', 'V-REGION identity %', 'V-REGION identity nt', 'D-REGION reading frame', 'AA JUNCTION', 'Functionality comment', 'Sequence', 'FR1-IMGT', 'FR2-IMGT', 'FR3-IMGT', 'CDR3-IMGT', 'JUNCTION', 'J-REGION', 'FR4-IMGT', 'P3V-nt nb', 'N1-REGION-nt nb', 'P5D-nt nb', 'P3D-nt nb', 'N2-REGION-nt nb', 'P5J-nt nb', '3V-REGION trimmed-nt nb', '5D-REGION trimmed-nt nb', '3D-REGION trimmed-nt nb', '5J-REGION trimmed-nt nb', u'Sample', u'Replicate'] if "N-REGION-nt nb" in files[0].columns: columns.insert(30, "N-REGION-nt nb") if blast_files is not 0: print "Has a parsed blastn file, using limited columns." columns = [u'ID', u'VDJ Frame', u'Top V Gene', u'Top D Gene', u'Top J Gene', u'CDR1 Seq', u'CDR1 Length', u'CDR2 Seq', u'CDR2 Length', u'CDR3 Seq', u'CDR3 Length', u'CDR3 Seq DNA', u'CDR3 Length DNA', u'Strand', u'CDR3 Found How', u'Sample', u'Replicate'] result = None for patient_id, samples in patients.iteritems(): count = 1 for sample in samples: sample['Sample'] = patient_id sample['Replicate'] = str(count) count += 1 if result is None: result = sample[columns] else: result = result.append(sample[columns]) result.to_csv(sys.argv[-1], sep="\t", index=False, index_label="index") if __name__ == "__main__": main()