# HG changeset patch # User fubar # Date 1691579536 0 # Node ID dd49a7040643ac8ee1f19da8ceb1ede47a55dbcb Initial commit diff -r 000000000000 -r dd49a7040643 lifelines_tool/LICENSE --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lifelines_tool/LICENSE Wed Aug 09 11:12:16 2023 +0000 @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 Ross + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff -r 000000000000 -r dd49a7040643 lifelines_tool/README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lifelines_tool/README.md Wed Aug 09 11:12:16 2023 +0000 @@ -0,0 +1,35 @@ +# lifelines_tool - lifelines statistical package wrapped as a Galaxy tool. + +## Galaxy tool to run failure time models using lifelines + +## Install to your Galaxy server from the toolshed - search for lifelines_tool owned by fubar2 + +### More at https://lazarus.name/demo/ + +#### Using the Rossi sample input data from lifelines, tool outputs include: + +![KM plot sample](lifelines_rossi_km.png) +and +![KM plot sample](lifelines_rossi_schoenfeld.png) +and +![KM plot sample](lifelines_report.png) + + +Runs Kaplan-Meier and generates a plot. Optional grouping variable. +If 2 groups, runs a log-rank test for difference. +Plots show confidence intervals + +If a list of covariate column names is provided, these are used in a +Cox Proportional Hazards model with tests for proportionality. + +Should work with any tabular data with the required columns - time and status for observations. + +Issues to https://github.com/fubar2/lifelines_tool please. +Autogenerated so pull requests are possibly meaningless but regeneration of a new version is easy so please tell me what is needed. + +## Tool made with the Galaxy ToolFactory: https://github.com/fubar2/galaxy_tf_overlay +The current release includes this and a generic tabular version, and a java .jar wrapper in a history where the generating +ToolFactory form can be recreated using the redo button. Editing the tool id will make a new tool, so all other edits to parameters can be +made and the new tool generated without destroying the original sample. + + diff -r 000000000000 -r dd49a7040643 lifelines_tool/lifelines_report.png Binary file lifelines_tool/lifelines_report.png has changed diff -r 000000000000 -r dd49a7040643 lifelines_tool/lifelines_rossi_km.png Binary file lifelines_tool/lifelines_rossi_km.png has changed diff -r 000000000000 -r dd49a7040643 lifelines_tool/lifelines_rossi_schoenfeld.png Binary file lifelines_tool/lifelines_rossi_schoenfeld.png has changed diff -r 000000000000 -r dd49a7040643 lifelines_tool/lifelineskmcph.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lifelines_tool/lifelineskmcph.xml Wed Aug 09 11:12:16 2023 +0000 @@ -0,0 +1,229 @@ + + + + Lifelines KM and optional Cox PH models + + pandas + matplotlib + lifelines + + + + + + + + 0: + newcols = args.header.split(',') + if len(newcols) == NCOLS: + if (args.time in newcols) and (args.status in newcols): + df.columns = newcols + else: + sys.stderr.write('## CRITICAL USAGE ERROR (not a bug!): time %s and/or status %s not found in supplied header parameter %s' % (args.time, args.status, args.header)) + sys.exit(4) + else: + sys.stderr.write('## CRITICAL USAGE ERROR (not a bug!): Supplied header %s has %d comma delimited header names - does not match the input tabular file %d columns' % (args.header, len(newcols), NCOLS)) + sys.exit(5) +else: # no header supplied - check for a real one that matches the x and y axis column names + colsok = (args.time in testcols) and (args.status in testcols) # if they match, probably ok...should use more code and logic.. + if colsok: + df.columns = testcols # use actual header + else: + colsok = (args.time in defaultcols) and (args.status in defaultcols) + if colsok: + sys.stderr.write('replacing first row of data derived header %s with %s' % (testcols, defaultcols)) + df.columns = defaultcols + else: + sys.stderr.write('## CRITICAL USAGE ERROR (not a bug!): time %s and status %s do not match anything in the file header, supplied header or automatic default column names %s' % (args.time, args.status, defaultcols)) +print('## Lifelines tool starting.\nUsing data header =', df.columns, 'time column =', args.time, 'status column =', args.status) +os.makedirs(args.image_dir, exist_ok=True) +fig, ax = plt.subplots() +if args.group > '': + names = [] + times = [] + events = [] + rmst = [] + for name, grouped_df in df.groupby(args.group): + T = grouped_df[args.time] + E = grouped_df[args.status] + gfit = kmf.fit(T, E, label=name) + kmf.plot_survival_function(ax=ax) + rst = lifelines.utils.restricted_mean_survival_time(gfit) + rmst.append(rst) + names.append(str(name)) + times.append(T) + events.append(E) + ax.set_title(args.title) + fig.savefig(os.path.join(args.image_dir,'KM_%s.png' % args.title)) + ngroup = len(names) + if ngroup == 2: # run logrank test if 2 groups + results = lifelines.statistics.logrank_test(times[0], times[1], events[0], events[1], alpha=.99) + print('Logrank test for %s - %s vs %s\n' % (args.group, names[0], names[1])) + results.print_summary() + elif ngroup > 1: + fig, ax = plt.subplots(nrows=ngroup, ncols=1, sharex=True) + for i, rst in rmst: + lifelines.plotting.rmst_plot(rst, ax=ax) + fig.savefig(os.path.join(args.image_dir,'RMST_%s.png' % args.title)) +else: + kmf.fit(df[args.time], df[args.status]) + kmf.plot_survival_function(ax=ax) + ax.set_title(args.title) + fig.savefig(os.path.join(args.image_dir,'KM_%s.png' % args.title)) +if len(args.cphcols) > 0: + fig, ax = plt.subplots() + ax.set_title('Cox PH model: %s' % args.title) + cphcols = args.cphcols.strip().split(',') + cphcols = [x.strip() for x in cphcols] + notfound = sum([(x not in df.columns) for x in cphcols]) + if notfound > 0: + sys.stderr.write('## CRITICAL USAGE ERROR (not a bug!): One or more requested Cox PH columns %s not found in supplied column header %s' % (args.cphcols, df.columns)) + sys.exit(6) + print('### Lifelines test of Proportional Hazards results with %s as covariates on %s' % (', '.join(cphcols), args.title)) + cphcols += [args.time, args.status] + cphdf = df[cphcols] + cph.fit(cphdf, duration_col=args.time, event_col=args.status) + cph.print_summary() + cphaxes = cph.check_assumptions(cphdf, p_value_threshold=0.01, show_plots=True) + for i, ax in enumerate(cphaxes): + figr = ax[0].get_figure() + titl = figr._suptitle.get_text().replace(' ','_').replace("'","") + oname = os.path.join(args.image_dir,'CPH%s.%s' % (titl, args.image_type)) + figr.savefig(oname) + + +#end raw]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 10.1093/bioinformatics/bts573 + + + diff -r 000000000000 -r dd49a7040643 lifelines_tool/plotlykm.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lifelines_tool/plotlykm.py Wed Aug 09 11:12:16 2023 +0000 @@ -0,0 +1,118 @@ +# script for a lifelines ToolFactory KM/CPH tool for Galaxy +# km models for https://github.com/galaxyproject/tools-iuc/issues/5393 +# test as +# python plotlykm.py --input_tab rossi.tab --htmlout "testfoo" --time "week" --status "arrest" --title "test" --image_dir images --cphcol="prio,age,race,paro,mar,fin" + +import argparse +import os +import sys + +import lifelines + +from matplotlib import pyplot as plt + +import pandas as pd + +# Ross Lazarus July 2023 + + +kmf = lifelines.KaplanMeierFitter() +cph = lifelines.CoxPHFitter() + +parser = argparse.ArgumentParser() +a = parser.add_argument +a('--input_tab', default='', required=True) +a('--header', default='') +a('--htmlout', default="test_run.html") +a('--group', default='') +a('--time', default='', required=True) +a('--status',default='', required=True) +a('--cphcols',default='') +a('--title', default='Default plot title') +a('--image_type', default='png') +a('--image_dir', default='images') +a('--readme', default='run_log.txt') +args = parser.parse_args() +sys.stdout = open(args.readme, 'w') +df = pd.read_csv(args.input_tab, sep='\t') +NCOLS = df.columns.size +NROWS = len(df.index) +defaultcols = ['col%d' % (x+1) for x in range(NCOLS)] +testcols = df.columns +if len(args.header.strip()) > 0: + newcols = args.header.split(',') + if len(newcols) == NCOLS: + if (args.time in newcols) and (args.status in newcols): + df.columns = newcols + else: + sys.stderr.write('## CRITICAL USAGE ERROR (not a bug!): time %s and/or status %s not found in supplied header parameter %s' % (args.time, args.status, args.header)) + sys.exit(4) + else: + sys.stderr.write('## CRITICAL USAGE ERROR (not a bug!): Supplied header %s has %d comma delimited header names - does not match the input tabular file %d columns' % (args.header, len(newcols), NCOLS)) + sys.exit(5) +else: # no header supplied - check for a real one that matches the x and y axis column names + colsok = (args.time in testcols) and (args.status in testcols) # if they match, probably ok...should use more code and logic.. + if colsok: + df.columns = testcols # use actual header + else: + colsok = (args.time in defaultcols) and (args.status in defaultcols) + if colsok: + sys.stderr.write('replacing first row of data derived header %s with %s' % (testcols, defaultcols)) + df.columns = defaultcols + else: + sys.stderr.write('## CRITICAL USAGE ERROR (not a bug!): time %s and status %s do not match anything in the file header, supplied header or automatic default column names %s' % (args.time, args.status, defaultcols)) +print('## Lifelines tool starting.\nUsing data header =', df.columns, 'time column =', args.time, 'status column =', args.status) +os.makedirs(args.image_dir, exist_ok=True) +fig, ax = plt.subplots() +if args.group > '': + names = [] + times = [] + events = [] + rmst = [] + for name, grouped_df in df.groupby(args.group): + T = grouped_df[args.time] + E = grouped_df[args.status] + gfit = kmf.fit(T, E, label=name) + kmf.plot_survival_function(ax=ax) + rst = lifelines.utils.restricted_mean_survival_time(gfit) + rmst.append(rst) + names.append(str(name)) + times.append(T) + events.append(E) + ngroup = len(names) + if ngroup == 2: # run logrank test if 2 groups + results = lifelines.statistics.logrank_test(times[0], times[1], events[0], events[1], alpha=.99) + print(' vs '.join(names), results) + results.print_summary() + elif ngroup > 1: + fig, ax = plt.subplots(nrows=ngroup, ncols=1, sharex=True) + for i, rst in rmst: + lifelines.plotting.rmst_plot(rst, ax=ax) + fig.savefig(os.path.join(args.image_dir,'RMST_%s.png' % args.title)) +else: + kmf.fit(df[args.time], df[args.status]) + kmf.plot_survival_function(ax=ax) +fig.savefig(os.path.join(args.image_dir,'KM_%s.png' % args.title)) +if len(args.cphcols) > 0: + fig, ax = plt.subplots() + cphcols = args.cphcols.strip().split(',') + cphcols = [x.strip() for x in cphcols] + notfound = sum([(x not in df.columns) for x in cphcols]) + if notfound > 0: + sys.stderr.write('## CRITICAL USAGE ERROR (not a bug!): One or more requested Cox PH columns %s not found in supplied column header %s' % (args.cphcols, df.columns)) + sys.exit(6) + print('### Lifelines test of Proportional Hazards results with %s as covariates on %s' % (', '.join(cphcols), args.title)) + cphcols += [args.time, args.status] + cphdf = df[cphcols] + cph.fit(cphdf, duration_col=args.time, event_col=args.status) + cph.print_summary() + cphaxes = cph.check_assumptions(cphdf, p_value_threshold=0.01, show_plots=True) + for i, ax in enumerate(cphaxes): + figr = ax[0].get_figure() + titl = figr._suptitle.get_text().replace(' ','_').replace("'","") + oname = os.path.join(args.image_dir,'CPH%s.%s' % (titl, args.image_type)) + figr.savefig(oname) + + + + diff -r 000000000000 -r dd49a7040643 lifelines_tool/rossi.tab --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lifelines_tool/rossi.tab Wed Aug 09 11:12:16 2023 +0000 @@ -0,0 +1,433 @@ + week arrest fin age race wexp mar paro prio +0 20 1 0 27 1 0 0 1 3 +1 17 1 0 18 1 0 0 1 8 +2 25 1 0 19 0 1 0 1 13 +3 52 0 1 23 1 1 1 1 1 +4 52 0 0 19 0 1 0 1 3 +5 52 0 0 24 1 1 0 0 2 +6 23 1 0 25 1 1 1 1 0 +7 52 0 1 21 1 1 0 1 4 +8 52 0 0 22 1 0 0 0 6 +9 52 0 0 20 1 1 0 0 0 +10 52 0 1 26 1 0 0 1 3 +11 52 0 0 40 1 1 0 0 2 +12 37 1 0 17 1 1 0 1 5 +13 52 0 0 37 1 1 0 0 2 +14 25 1 0 20 1 0 0 1 3 +15 46 1 1 22 1 1 0 1 2 +16 28 1 0 19 1 0 0 0 7 +17 52 0 0 20 1 0 0 0 2 +18 52 0 0 25 1 0 0 1 12 +19 52 0 0 24 0 1 0 1 1 +20 52 0 0 23 1 0 0 1 4 +21 52 0 1 44 1 1 1 1 0 +22 24 1 1 29 1 1 0 1 2 +23 52 0 1 28 0 1 0 1 1 +24 52 0 1 21 1 1 0 0 0 +25 52 0 1 19 1 1 0 1 2 +26 52 0 0 33 1 1 0 1 1 +27 52 0 0 19 1 0 0 0 2 +28 52 0 1 19 1 0 0 1 3 +29 52 0 1 23 1 1 1 1 9 +30 52 0 1 23 1 0 0 1 3 +31 52 0 1 19 1 0 0 1 1 +32 52 0 1 42 1 1 0 0 0 +33 52 0 0 23 1 1 1 0 2 +34 52 0 0 24 1 1 0 0 3 +35 50 1 1 20 1 1 0 1 2 +36 52 0 0 22 1 1 0 1 5 +37 52 0 0 27 1 1 0 0 2 +38 52 0 1 19 1 0 0 0 4 +39 52 0 0 28 1 1 0 1 3 +40 52 0 1 33 1 1 1 0 9 +41 52 0 0 24 1 1 0 1 1 +42 10 1 0 21 1 0 0 1 14 +43 52 0 0 22 1 0 0 1 2 +44 52 0 1 19 1 0 0 1 2 +45 52 0 1 22 1 0 0 0 2 +46 52 0 1 22 1 0 0 0 15 +47 20 1 1 23 1 1 0 1 5 +48 52 0 0 32 1 1 1 1 2 +49 52 0 1 27 1 1 0 1 0 +50 52 0 1 36 1 1 0 0 0 +51 52 0 1 22 1 1 0 1 1 +52 52 0 1 32 1 1 0 1 1 +53 50 1 1 19 1 1 0 0 10 +54 52 0 0 28 1 1 1 1 1 +55 52 0 0 32 0 1 0 0 3 +56 52 0 0 33 1 1 1 1 1 +57 52 0 0 26 1 0 0 1 1 +58 52 0 1 20 1 1 0 1 0 +59 52 0 1 42 1 1 0 0 9 +60 6 1 0 19 1 0 0 0 6 +61 52 0 0 22 1 1 0 0 2 +62 52 0 0 22 1 0 0 1 5 +63 52 0 0 36 1 0 0 0 11 +64 52 1 0 23 1 1 0 0 2 +65 52 0 1 27 1 1 0 1 3 +66 52 0 1 21 1 0 0 1 1 +67 52 0 1 22 1 1 0 1 2 +68 49 1 0 35 1 1 0 1 3 +69 52 0 0 21 1 1 0 1 4 +70 52 0 1 25 1 1 0 1 5 +71 52 0 0 18 1 0 0 1 0 +72 52 0 1 26 1 1 0 1 2 +73 52 0 0 30 1 1 0 0 4 +74 52 0 0 20 1 0 0 1 2 +75 52 0 1 43 1 1 0 1 1 +76 43 1 0 23 1 1 1 1 4 +77 52 0 0 42 0 1 0 0 2 +78 52 0 0 21 0 0 0 0 2 +79 5 1 0 19 1 0 0 0 3 +80 27 1 0 29 1 0 0 0 4 +81 52 0 0 30 1 1 0 1 3 +82 52 0 1 21 0 1 1 1 10 +83 52 0 0 20 1 0 0 1 7 +84 22 1 1 19 1 0 0 1 10 +85 52 0 1 22 1 1 0 1 1 +86 52 0 0 25 1 0 0 1 3 +87 18 1 0 22 1 0 0 0 4 +88 52 0 1 22 1 1 0 1 4 +89 52 0 1 24 1 0 0 1 2 +90 52 0 0 39 1 1 1 1 4 +91 52 0 0 21 1 1 0 1 1 +92 52 0 1 20 1 1 0 1 2 +93 52 0 1 24 1 0 0 0 1 +94 52 0 0 25 1 1 0 1 2 +95 24 1 1 21 1 1 0 0 4 +96 52 0 1 20 1 0 0 1 1 +97 52 0 1 19 1 0 0 1 3 +98 52 0 1 24 1 0 0 0 2 +99 52 0 1 24 1 1 0 1 1 +100 2 1 0 44 1 1 0 1 2 +101 26 1 0 32 1 1 0 0 2 +102 52 0 0 23 1 1 0 0 3 +103 49 1 1 19 1 0 0 1 1 +104 52 0 0 20 1 0 0 1 1 +105 21 1 0 27 1 1 0 1 0 +106 48 1 0 19 1 0 0 0 6 +107 52 0 1 21 1 1 0 0 1 +108 52 0 0 20 1 0 0 1 1 +109 52 0 0 25 1 0 0 1 3 +110 52 0 0 20 1 0 0 1 6 +111 52 0 1 23 1 1 0 1 3 +112 52 0 0 20 1 0 0 0 2 +113 52 0 1 30 1 1 0 0 1 +114 52 0 0 25 1 1 1 1 0 +115 52 0 0 22 1 1 0 1 1 +116 52 0 0 24 1 1 0 1 3 +117 52 0 1 18 1 0 0 0 4 +118 8 1 1 40 1 1 0 1 1 +119 52 0 0 22 1 0 0 1 1 +120 52 0 1 23 1 0 0 1 6 +121 49 1 0 21 1 1 0 1 1 +122 52 0 1 24 0 1 1 1 2 +123 52 0 1 24 1 0 0 1 14 +124 52 0 0 38 1 1 0 1 2 +125 52 0 0 26 0 1 0 0 3 +126 52 0 1 29 1 1 0 0 1 +127 52 0 0 21 1 0 0 1 8 +128 52 0 1 21 1 1 1 1 2 +129 52 0 0 22 0 0 0 1 4 +130 8 1 0 23 1 0 0 1 5 +131 52 0 0 27 1 1 0 0 2 +132 52 0 1 18 1 0 0 1 2 +133 13 1 0 23 1 0 0 0 5 +134 52 0 1 24 1 0 0 1 2 +135 52 0 1 21 1 0 0 0 3 +136 52 0 1 20 1 0 0 1 4 +137 52 0 1 27 1 1 0 0 4 +138 8 1 1 20 1 0 0 1 11 +139 52 0 1 29 1 1 1 1 5 +140 33 1 0 19 1 0 0 0 10 +141 52 0 0 20 1 0 0 0 8 +142 52 0 1 18 1 0 0 1 0 +143 11 1 1 19 1 0 0 1 2 +144 52 0 1 24 1 1 0 1 1 +145 52 0 0 28 0 1 0 1 4 +146 52 0 1 26 1 0 0 0 4 +147 52 0 1 17 1 0 0 1 0 +148 52 0 0 21 1 0 0 1 3 +149 37 1 0 34 1 1 0 0 2 +150 52 0 1 26 1 1 0 0 1 +151 52 0 1 43 1 1 0 1 2 +152 52 0 0 20 1 0 0 1 0 +153 44 1 0 20 1 1 0 1 1 +154 52 0 0 32 1 1 0 1 1 +155 52 1 0 25 0 1 0 1 1 +156 52 0 1 22 1 0 0 1 1 +157 52 0 1 31 0 1 0 1 1 +158 52 0 1 42 1 1 1 1 4 +159 52 0 1 32 1 1 0 0 10 +160 52 0 1 20 1 0 0 0 8 +161 52 0 0 20 0 0 0 0 1 +162 52 0 1 36 1 0 0 1 8 +163 52 0 1 34 1 1 0 1 2 +164 52 0 1 28 1 1 1 1 3 +165 52 0 1 21 1 1 0 1 2 +166 52 0 0 18 0 0 0 1 6 +167 52 0 1 20 0 0 0 1 4 +168 52 0 0 17 0 0 0 1 3 +169 52 0 1 44 1 0 0 1 3 +170 52 0 1 30 1 1 0 1 5 +171 52 0 1 22 1 0 0 0 11 +172 9 1 1 30 1 0 0 0 3 +173 17 1 0 23 1 0 0 0 8 +174 52 0 1 20 1 1 0 0 2 +175 52 0 0 19 1 0 0 0 10 +176 52 0 1 21 1 0 0 1 1 +177 52 0 1 22 1 0 0 1 6 +178 52 0 1 19 1 0 0 1 2 +179 52 0 1 21 1 0 0 0 10 +180 16 1 0 38 1 0 0 1 3 +181 52 0 1 24 1 0 0 0 7 +182 52 0 1 39 1 1 1 0 2 +183 3 1 0 30 1 0 0 1 3 +184 52 0 0 37 1 1 0 0 0 +185 52 0 1 23 1 0 0 1 2 +186 52 0 0 21 0 0 0 1 1 +187 52 0 1 31 1 1 0 1 1 +188 52 0 1 24 1 0 0 0 13 +189 52 0 0 31 0 1 1 1 3 +190 52 0 0 24 1 0 0 1 2 +191 52 0 1 24 1 1 0 1 1 +192 52 0 1 21 0 1 0 0 1 +193 52 0 1 22 1 0 0 0 2 +194 45 1 0 20 1 0 0 1 5 +195 52 0 1 21 1 1 0 1 0 +196 52 0 1 24 1 1 0 1 2 +197 52 0 0 25 1 1 0 1 1 +198 52 0 0 19 1 0 0 1 1 +199 52 0 0 20 0 0 0 0 2 +200 52 0 0 20 1 1 0 1 4 +201 28 1 0 24 1 1 0 0 1 +202 52 0 1 18 1 0 0 0 4 +203 16 1 1 28 1 0 0 1 5 +204 15 1 1 19 1 0 0 0 4 +205 52 0 0 19 1 1 0 0 1 +206 52 0 0 25 1 1 1 0 0 +207 52 0 1 19 0 0 0 0 1 +208 52 0 1 25 1 0 0 0 2 +209 14 1 0 24 1 0 0 0 0 +210 52 0 1 20 1 1 0 1 1 +211 52 0 1 30 1 0 0 1 1 +212 52 0 0 29 1 1 0 1 4 +213 52 0 0 28 0 1 1 1 4 +214 52 0 1 36 1 0 0 1 1 +215 52 0 1 23 1 1 0 0 7 +216 52 0 1 23 1 0 0 0 2 +217 52 0 0 24 1 1 0 1 4 +218 52 0 0 29 1 1 1 1 1 +219 52 0 0 26 1 1 1 1 2 +220 52 0 0 39 0 1 1 0 3 +221 52 0 1 20 1 1 0 1 1 +222 52 0 0 23 1 1 0 1 3 +223 52 0 1 21 1 1 0 1 2 +224 52 0 1 21 1 0 0 1 1 +225 7 1 1 20 0 0 0 1 2 +226 52 0 1 20 0 0 0 1 3 +227 52 0 1 27 1 1 0 1 2 +228 43 1 0 18 0 1 0 0 3 +229 46 1 1 25 1 1 0 0 1 +230 40 1 1 20 1 0 0 0 6 +231 52 0 1 20 1 1 0 0 5 +232 14 1 0 20 0 0 0 0 7 +233 52 0 0 24 0 1 1 0 11 +234 52 0 1 23 1 0 0 0 1 +235 8 1 0 28 1 1 0 0 4 +236 52 0 0 21 1 0 0 0 2 +237 52 0 0 25 1 0 0 0 1 +238 52 0 0 24 1 1 0 1 1 +239 52 0 0 29 1 1 1 0 3 +240 52 0 0 22 1 1 1 1 2 +241 25 1 0 28 1 0 0 1 18 +242 52 0 0 19 1 0 0 1 1 +243 52 0 0 20 1 1 0 1 1 +244 17 1 0 20 1 0 0 1 5 +245 37 1 1 22 1 1 0 1 1 +246 52 0 0 20 1 0 0 1 8 +247 52 0 1 21 1 0 0 1 2 +248 52 0 1 21 1 1 0 1 1 +249 32 1 0 19 1 0 0 1 3 +250 52 0 0 26 1 1 0 1 1 +251 52 0 0 23 1 1 1 1 2 +252 52 0 1 22 0 1 0 1 4 +253 52 0 1 24 1 1 0 0 8 +254 52 0 1 40 1 0 0 0 5 +255 52 0 1 32 1 0 0 0 2 +256 52 0 0 38 1 1 0 0 0 +257 52 0 0 26 1 1 0 1 1 +258 12 1 1 27 1 1 0 1 0 +259 52 0 0 29 1 1 1 1 3 +260 18 1 0 20 1 1 0 1 4 +261 52 0 0 22 1 0 0 0 1 +262 52 0 0 22 0 0 0 0 5 +263 14 1 1 19 1 1 0 0 12 +264 52 0 0 22 1 1 0 0 1 +265 52 0 0 19 1 0 0 0 3 +266 52 0 1 32 1 1 0 0 1 +267 52 0 1 25 1 1 0 1 2 +268 38 1 0 21 1 0 0 1 2 +269 52 0 1 36 1 1 0 1 1 +270 24 1 0 40 1 1 0 0 2 +271 20 1 1 20 1 0 0 1 1 +272 32 1 1 19 1 0 0 1 0 +273 52 0 0 18 1 0 0 1 4 +274 52 0 1 28 1 1 0 0 0 +275 52 0 1 22 1 1 0 0 2 +276 52 0 1 25 1 0 0 1 1 +277 52 0 1 28 1 1 0 0 2 +278 52 0 1 25 1 1 0 0 2 +279 52 0 1 20 1 1 0 0 4 +280 52 0 1 24 1 0 0 0 5 +281 52 0 0 24 1 1 0 0 0 +282 52 0 1 36 0 1 0 1 2 +283 52 0 1 34 1 1 0 0 1 +284 31 1 0 19 1 1 0 1 5 +285 20 1 1 23 1 0 0 1 1 +286 40 1 0 19 1 1 0 1 3 +287 52 0 1 40 1 1 0 0 2 +288 52 0 1 31 1 1 0 0 2 +289 52 0 0 23 1 1 1 1 0 +290 52 0 0 42 1 0 0 1 2 +291 42 1 1 26 1 1 1 1 1 +292 52 0 0 20 1 0 0 1 9 +293 26 1 0 27 1 1 0 1 1 +294 52 0 1 24 1 0 0 0 5 +295 52 0 0 25 1 0 0 0 2 +296 52 0 1 22 1 1 0 1 3 +297 52 0 1 20 1 0 0 1 2 +298 52 0 1 20 1 1 0 1 2 +299 47 1 0 22 1 0 0 1 3 +300 52 0 0 18 1 1 0 1 1 +301 52 0 0 20 1 1 0 1 2 +302 40 1 0 20 1 1 0 1 1 +303 52 0 0 22 1 1 0 1 2 +304 52 0 1 30 1 1 1 0 2 +305 52 0 0 36 0 1 0 0 1 +306 52 0 0 25 0 1 1 1 5 +307 21 1 0 29 1 0 0 1 3 +308 52 0 0 19 1 1 0 1 3 +309 52 0 1 24 1 1 0 1 2 +310 52 0 1 21 1 0 0 0 0 +311 52 0 1 35 1 1 0 1 6 +312 52 0 1 19 0 1 0 0 4 +313 1 1 0 20 1 0 0 0 0 +314 43 1 0 22 0 0 0 0 3 +315 24 1 0 23 1 1 0 0 1 +316 11 1 0 19 1 0 0 0 18 +317 52 0 0 18 1 0 0 1 3 +318 52 0 1 38 0 1 0 1 2 +319 52 0 1 18 0 0 0 1 6 +320 52 0 0 22 1 1 1 1 1 +321 33 1 0 21 1 0 0 1 3 +322 52 0 0 21 1 1 0 1 1 +323 46 1 1 21 1 0 0 1 5 +324 36 1 1 17 1 0 0 1 3 +325 52 0 1 22 1 1 0 1 1 +326 52 0 1 23 1 1 1 1 0 +327 18 1 1 19 1 0 0 1 4 +328 52 0 1 21 1 1 0 0 1 +329 52 0 1 35 1 1 0 0 5 +330 50 1 0 23 1 1 0 0 8 +331 52 0 1 22 1 0 0 0 2 +332 34 1 1 25 1 0 0 0 11 +333 52 0 1 20 1 1 0 0 4 +334 35 1 1 19 0 0 0 0 1 +335 52 0 0 20 1 0 0 0 1 +336 52 0 1 41 0 1 1 1 3 +337 39 1 0 23 0 1 0 1 4 +338 9 1 1 26 1 1 0 0 0 +339 52 0 0 26 1 0 0 0 2 +340 52 0 1 38 1 1 0 1 1 +341 52 0 0 27 1 1 0 1 1 +342 34 1 1 19 1 0 0 1 3 +343 52 0 0 25 1 1 0 0 1 +344 52 0 1 30 1 1 0 0 2 +345 52 0 1 42 1 1 0 0 1 +346 44 1 0 20 1 1 0 1 2 +347 52 0 1 23 1 1 0 1 1 +348 52 0 0 21 1 0 0 1 3 +349 35 1 1 20 1 1 0 0 3 +350 30 1 0 17 1 0 0 0 1 +351 39 1 1 26 1 0 1 0 5 +352 52 0 1 24 1 1 0 1 1 +353 52 0 0 37 1 1 1 1 1 +354 52 0 0 28 1 1 1 1 1 +355 52 0 0 33 1 1 0 1 0 +356 19 1 1 22 1 0 0 1 4 +357 52 0 0 25 1 1 1 1 2 +358 43 1 0 20 0 0 1 0 10 +359 52 0 0 20 1 0 0 0 1 +360 48 1 1 24 0 1 0 0 4 +361 37 1 1 26 0 0 0 0 11 +362 20 1 1 26 1 1 0 1 1 +363 52 0 0 25 0 1 0 1 1 +364 52 0 0 26 1 1 0 1 1 +365 36 1 1 23 1 0 0 0 3 +366 52 0 1 28 1 0 0 1 4 +367 52 0 0 27 1 1 0 1 0 +368 52 0 1 23 1 0 0 0 3 +369 52 0 0 17 1 0 0 1 7 +370 52 0 0 20 0 0 0 1 4 +371 52 0 1 20 1 0 0 1 5 +372 52 0 1 20 0 0 0 1 9 +373 30 1 1 22 1 1 0 1 2 +374 52 0 0 31 1 0 0 1 1 +375 52 0 0 43 1 1 0 0 1 +376 52 0 0 29 1 1 0 0 1 +377 52 0 1 21 1 0 0 0 0 +378 52 0 0 24 0 1 0 0 2 +379 52 0 1 30 1 1 1 0 3 +380 52 0 0 22 1 1 0 0 0 +381 52 0 1 26 1 1 1 0 1 +382 42 1 1 20 1 1 0 0 0 +383 52 0 0 23 1 1 0 0 6 +384 52 0 0 25 1 1 0 1 5 +385 52 0 1 40 1 1 0 1 2 +386 52 0 1 30 1 1 1 1 0 +387 26 1 0 22 1 0 1 1 2 +388 40 1 0 18 1 0 0 1 2 +389 52 0 0 18 1 0 0 1 0 +390 52 0 0 24 1 1 0 1 2 +391 52 0 1 25 1 1 1 1 2 +392 35 1 1 19 1 0 0 1 2 +393 52 0 0 24 1 1 0 1 2 +394 46 1 0 24 1 0 1 1 2 +395 52 0 0 18 1 1 0 1 3 +396 49 1 1 18 1 1 0 1 0 +397 52 0 0 23 1 1 0 0 0 +398 52 0 0 20 1 0 0 1 2 +399 49 1 1 18 1 1 0 1 1 +400 52 0 0 23 1 1 0 1 2 +401 52 0 1 20 1 0 0 1 1 +402 52 0 0 23 1 0 0 0 5 +403 52 0 0 23 1 1 0 1 1 +404 52 0 0 23 1 1 1 1 2 +405 35 1 0 20 1 1 0 1 4 +406 52 0 1 26 1 1 1 0 4 +407 52 0 1 30 1 0 0 0 1 +408 52 0 1 36 0 1 0 0 4 +409 52 0 1 43 1 1 0 0 4 +410 27 1 0 20 0 1 0 0 1 +411 52 0 1 24 1 1 0 1 1 +412 52 0 0 22 1 1 0 0 1 +413 52 0 1 20 1 0 1 0 1 +414 52 1 0 21 1 0 0 0 0 +415 45 1 1 18 1 0 0 0 5 +416 4 1 0 18 1 1 0 0 1 +417 52 1 0 33 1 1 0 1 2 +418 36 1 1 19 1 0 0 1 2 +419 52 0 1 21 0 1 0 1 1 +420 52 0 1 21 1 0 0 1 1 +421 8 1 1 21 1 1 0 1 4 +422 15 1 1 22 1 0 0 1 3 +423 52 0 0 18 1 0 0 1 3 +424 19 1 0 18 1 0 0 0 2 +425 52 0 0 24 1 1 0 1 2 +426 12 1 1 22 1 1 1 1 2 +427 52 0 1 31 0 1 0 1 3 +428 52 0 0 20 1 0 0 1 1 +429 52 0 1 20 1 1 1 1 1 +430 52 0 0 29 1 1 0 1 3 +431 52 0 1 24 1 1 0 1 1 diff -r 000000000000 -r dd49a7040643 lifelines_tool/run_log.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lifelines_tool/run_log.txt Wed Aug 09 11:12:16 2023 +0000 @@ -0,0 +1,107 @@ +## Lifelines tool starting. +Using data header = Index(['Unnamed: 0', 'week', 'arrest', 'fin', 'age', 'race', 'wexp', 'mar', + 'paro', 'prio'], + dtype='object') time column = week status column = arrest +### Lifelines test of Proportional Hazards results with prio, age, race, paro, mar, fin as covariates on test + + duration col = 'week' + event col = 'arrest' + baseline estimation = breslow + number of observations = 432 +number of events observed = 114 + partial log-likelihood = -659.00 + time fit was run = 2023-08-09 00:18:43 UTC + +--- + coef exp(coef) se(coef) coef lower 95% coef upper 95% exp(coef) lower 95% exp(coef) upper 95% +covariate +prio 0.10 1.10 0.03 0.04 0.15 1.04 1.16 +age -0.06 0.94 0.02 -0.10 -0.02 0.90 0.98 +race 0.32 1.38 0.31 -0.28 0.92 0.75 2.52 +paro -0.09 0.91 0.20 -0.47 0.29 0.62 1.34 +mar -0.48 0.62 0.38 -1.22 0.25 0.30 1.29 +fin -0.38 0.68 0.19 -0.75 -0.00 0.47 1.00 + + cmp to z p -log2(p) +covariate +prio 0.00 3.53 <0.005 11.26 +age 0.00 -2.95 <0.005 8.28 +race 0.00 1.04 0.30 1.75 +paro 0.00 -0.46 0.65 0.63 +mar 0.00 -1.28 0.20 2.32 +fin 0.00 -1.98 0.05 4.40 +--- +Concordance = 0.63 +Partial AIC = 1330.00 +log-likelihood ratio test = 32.77 on 6 df +-log2(p) of ll-ratio test = 16.39 + + + Bootstrapping lowess lines. May take a moment... + + + Bootstrapping lowess lines. May take a moment... + +The ``p_value_threshold`` is set at 0.01. Even under the null hypothesis of no violations, some +covariates will be below the threshold by chance. This is compounded when there are many covariates. +Similarly, when there are lots of observations, even minor deviances from the proportional hazard +assumption will be flagged. + +With that in mind, it's best to use a combination of statistical tests and visual tests to determine +the most serious violations. Produce visual plots using ``check_assumptions(..., show_plots=True)`` +and looking for non-constant lines. See link [A] below for a full example. + + + null_distribution = chi squared +degrees_of_freedom = 1 + model = + test_name = proportional_hazard_test + +--- + test_statistic p -log2(p) +age km 6.99 0.01 6.93 + rank 7.40 0.01 7.26 +fin km 0.02 0.90 0.15 + rank 0.01 0.91 0.13 +mar km 1.64 0.20 2.32 + rank 1.80 0.18 2.48 +paro km 0.06 0.81 0.31 + rank 0.07 0.79 0.34 +prio km 0.92 0.34 1.57 + rank 0.88 0.35 1.52 +race km 1.70 0.19 2.38 + rank 1.68 0.19 2.36 + + +1. Variable 'age' failed the non-proportional test: p-value is 0.0065. + + Advice 1: the functional form of the variable 'age' might be incorrect. That is, there may be +non-linear terms missing. The proportional hazard test used is very sensitive to incorrect +functional forms. See documentation in link [D] below on how to specify a functional form. + + Advice 2: try binning the variable 'age' using pd.cut, and then specify it in `strata=['age', +...]` in the call in `.fit`. See documentation in link [B] below. + + Advice 3: try adding an interaction term with your time variable. See documentation in link [C] +below. + + + Bootstrapping lowess lines. May take a moment... + + + Bootstrapping lowess lines. May take a moment... + + + Bootstrapping lowess lines. May take a moment... + + + Bootstrapping lowess lines. May take a moment... + + +--- +[A] https://lifelines.readthedocs.io/en/latest/jupyter_notebooks/Proportional%20hazard%20assumption.html +[B] https://lifelines.readthedocs.io/en/latest/jupyter_notebooks/Proportional%20hazard%20assumption.html#Bin-variable-and-stratify-on-it +[C] https://lifelines.readthedocs.io/en/latest/jupyter_notebooks/Proportional%20hazard%20assumption.html#Introduce-time-varying-covariates +[D] https://lifelines.readthedocs.io/en/latest/jupyter_notebooks/Proportional%20hazard%20assumption.html#Modify-the-functional-form +[E] https://lifelines.readthedocs.io/en/latest/jupyter_notebooks/Proportional%20hazard%20assumption.html#Stratification + diff -r 000000000000 -r dd49a7040643 lifelines_tool/test-data/input_tab_sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lifelines_tool/test-data/input_tab_sample Wed Aug 09 11:12:16 2023 +0000 @@ -0,0 +1,433 @@ + week arrest fin age race wexp mar paro prio +0 20 1 0 27 1 0 0 1 3 +1 17 1 0 18 1 0 0 1 8 +2 25 1 0 19 0 1 0 1 13 +3 52 0 1 23 1 1 1 1 1 +4 52 0 0 19 0 1 0 1 3 +5 52 0 0 24 1 1 0 0 2 +6 23 1 0 25 1 1 1 1 0 +7 52 0 1 21 1 1 0 1 4 +8 52 0 0 22 1 0 0 0 6 +9 52 0 0 20 1 1 0 0 0 +10 52 0 1 26 1 0 0 1 3 +11 52 0 0 40 1 1 0 0 2 +12 37 1 0 17 1 1 0 1 5 +13 52 0 0 37 1 1 0 0 2 +14 25 1 0 20 1 0 0 1 3 +15 46 1 1 22 1 1 0 1 2 +16 28 1 0 19 1 0 0 0 7 +17 52 0 0 20 1 0 0 0 2 +18 52 0 0 25 1 0 0 1 12 +19 52 0 0 24 0 1 0 1 1 +20 52 0 0 23 1 0 0 1 4 +21 52 0 1 44 1 1 1 1 0 +22 24 1 1 29 1 1 0 1 2 +23 52 0 1 28 0 1 0 1 1 +24 52 0 1 21 1 1 0 0 0 +25 52 0 1 19 1 1 0 1 2 +26 52 0 0 33 1 1 0 1 1 +27 52 0 0 19 1 0 0 0 2 +28 52 0 1 19 1 0 0 1 3 +29 52 0 1 23 1 1 1 1 9 +30 52 0 1 23 1 0 0 1 3 +31 52 0 1 19 1 0 0 1 1 +32 52 0 1 42 1 1 0 0 0 +33 52 0 0 23 1 1 1 0 2 +34 52 0 0 24 1 1 0 0 3 +35 50 1 1 20 1 1 0 1 2 +36 52 0 0 22 1 1 0 1 5 +37 52 0 0 27 1 1 0 0 2 +38 52 0 1 19 1 0 0 0 4 +39 52 0 0 28 1 1 0 1 3 +40 52 0 1 33 1 1 1 0 9 +41 52 0 0 24 1 1 0 1 1 +42 10 1 0 21 1 0 0 1 14 +43 52 0 0 22 1 0 0 1 2 +44 52 0 1 19 1 0 0 1 2 +45 52 0 1 22 1 0 0 0 2 +46 52 0 1 22 1 0 0 0 15 +47 20 1 1 23 1 1 0 1 5 +48 52 0 0 32 1 1 1 1 2 +49 52 0 1 27 1 1 0 1 0 +50 52 0 1 36 1 1 0 0 0 +51 52 0 1 22 1 1 0 1 1 +52 52 0 1 32 1 1 0 1 1 +53 50 1 1 19 1 1 0 0 10 +54 52 0 0 28 1 1 1 1 1 +55 52 0 0 32 0 1 0 0 3 +56 52 0 0 33 1 1 1 1 1 +57 52 0 0 26 1 0 0 1 1 +58 52 0 1 20 1 1 0 1 0 +59 52 0 1 42 1 1 0 0 9 +60 6 1 0 19 1 0 0 0 6 +61 52 0 0 22 1 1 0 0 2 +62 52 0 0 22 1 0 0 1 5 +63 52 0 0 36 1 0 0 0 11 +64 52 1 0 23 1 1 0 0 2 +65 52 0 1 27 1 1 0 1 3 +66 52 0 1 21 1 0 0 1 1 +67 52 0 1 22 1 1 0 1 2 +68 49 1 0 35 1 1 0 1 3 +69 52 0 0 21 1 1 0 1 4 +70 52 0 1 25 1 1 0 1 5 +71 52 0 0 18 1 0 0 1 0 +72 52 0 1 26 1 1 0 1 2 +73 52 0 0 30 1 1 0 0 4 +74 52 0 0 20 1 0 0 1 2 +75 52 0 1 43 1 1 0 1 1 +76 43 1 0 23 1 1 1 1 4 +77 52 0 0 42 0 1 0 0 2 +78 52 0 0 21 0 0 0 0 2 +79 5 1 0 19 1 0 0 0 3 +80 27 1 0 29 1 0 0 0 4 +81 52 0 0 30 1 1 0 1 3 +82 52 0 1 21 0 1 1 1 10 +83 52 0 0 20 1 0 0 1 7 +84 22 1 1 19 1 0 0 1 10 +85 52 0 1 22 1 1 0 1 1 +86 52 0 0 25 1 0 0 1 3 +87 18 1 0 22 1 0 0 0 4 +88 52 0 1 22 1 1 0 1 4 +89 52 0 1 24 1 0 0 1 2 +90 52 0 0 39 1 1 1 1 4 +91 52 0 0 21 1 1 0 1 1 +92 52 0 1 20 1 1 0 1 2 +93 52 0 1 24 1 0 0 0 1 +94 52 0 0 25 1 1 0 1 2 +95 24 1 1 21 1 1 0 0 4 +96 52 0 1 20 1 0 0 1 1 +97 52 0 1 19 1 0 0 1 3 +98 52 0 1 24 1 0 0 0 2 +99 52 0 1 24 1 1 0 1 1 +100 2 1 0 44 1 1 0 1 2 +101 26 1 0 32 1 1 0 0 2 +102 52 0 0 23 1 1 0 0 3 +103 49 1 1 19 1 0 0 1 1 +104 52 0 0 20 1 0 0 1 1 +105 21 1 0 27 1 1 0 1 0 +106 48 1 0 19 1 0 0 0 6 +107 52 0 1 21 1 1 0 0 1 +108 52 0 0 20 1 0 0 1 1 +109 52 0 0 25 1 0 0 1 3 +110 52 0 0 20 1 0 0 1 6 +111 52 0 1 23 1 1 0 1 3 +112 52 0 0 20 1 0 0 0 2 +113 52 0 1 30 1 1 0 0 1 +114 52 0 0 25 1 1 1 1 0 +115 52 0 0 22 1 1 0 1 1 +116 52 0 0 24 1 1 0 1 3 +117 52 0 1 18 1 0 0 0 4 +118 8 1 1 40 1 1 0 1 1 +119 52 0 0 22 1 0 0 1 1 +120 52 0 1 23 1 0 0 1 6 +121 49 1 0 21 1 1 0 1 1 +122 52 0 1 24 0 1 1 1 2 +123 52 0 1 24 1 0 0 1 14 +124 52 0 0 38 1 1 0 1 2 +125 52 0 0 26 0 1 0 0 3 +126 52 0 1 29 1 1 0 0 1 +127 52 0 0 21 1 0 0 1 8 +128 52 0 1 21 1 1 1 1 2 +129 52 0 0 22 0 0 0 1 4 +130 8 1 0 23 1 0 0 1 5 +131 52 0 0 27 1 1 0 0 2 +132 52 0 1 18 1 0 0 1 2 +133 13 1 0 23 1 0 0 0 5 +134 52 0 1 24 1 0 0 1 2 +135 52 0 1 21 1 0 0 0 3 +136 52 0 1 20 1 0 0 1 4 +137 52 0 1 27 1 1 0 0 4 +138 8 1 1 20 1 0 0 1 11 +139 52 0 1 29 1 1 1 1 5 +140 33 1 0 19 1 0 0 0 10 +141 52 0 0 20 1 0 0 0 8 +142 52 0 1 18 1 0 0 1 0 +143 11 1 1 19 1 0 0 1 2 +144 52 0 1 24 1 1 0 1 1 +145 52 0 0 28 0 1 0 1 4 +146 52 0 1 26 1 0 0 0 4 +147 52 0 1 17 1 0 0 1 0 +148 52 0 0 21 1 0 0 1 3 +149 37 1 0 34 1 1 0 0 2 +150 52 0 1 26 1 1 0 0 1 +151 52 0 1 43 1 1 0 1 2 +152 52 0 0 20 1 0 0 1 0 +153 44 1 0 20 1 1 0 1 1 +154 52 0 0 32 1 1 0 1 1 +155 52 1 0 25 0 1 0 1 1 +156 52 0 1 22 1 0 0 1 1 +157 52 0 1 31 0 1 0 1 1 +158 52 0 1 42 1 1 1 1 4 +159 52 0 1 32 1 1 0 0 10 +160 52 0 1 20 1 0 0 0 8 +161 52 0 0 20 0 0 0 0 1 +162 52 0 1 36 1 0 0 1 8 +163 52 0 1 34 1 1 0 1 2 +164 52 0 1 28 1 1 1 1 3 +165 52 0 1 21 1 1 0 1 2 +166 52 0 0 18 0 0 0 1 6 +167 52 0 1 20 0 0 0 1 4 +168 52 0 0 17 0 0 0 1 3 +169 52 0 1 44 1 0 0 1 3 +170 52 0 1 30 1 1 0 1 5 +171 52 0 1 22 1 0 0 0 11 +172 9 1 1 30 1 0 0 0 3 +173 17 1 0 23 1 0 0 0 8 +174 52 0 1 20 1 1 0 0 2 +175 52 0 0 19 1 0 0 0 10 +176 52 0 1 21 1 0 0 1 1 +177 52 0 1 22 1 0 0 1 6 +178 52 0 1 19 1 0 0 1 2 +179 52 0 1 21 1 0 0 0 10 +180 16 1 0 38 1 0 0 1 3 +181 52 0 1 24 1 0 0 0 7 +182 52 0 1 39 1 1 1 0 2 +183 3 1 0 30 1 0 0 1 3 +184 52 0 0 37 1 1 0 0 0 +185 52 0 1 23 1 0 0 1 2 +186 52 0 0 21 0 0 0 1 1 +187 52 0 1 31 1 1 0 1 1 +188 52 0 1 24 1 0 0 0 13 +189 52 0 0 31 0 1 1 1 3 +190 52 0 0 24 1 0 0 1 2 +191 52 0 1 24 1 1 0 1 1 +192 52 0 1 21 0 1 0 0 1 +193 52 0 1 22 1 0 0 0 2 +194 45 1 0 20 1 0 0 1 5 +195 52 0 1 21 1 1 0 1 0 +196 52 0 1 24 1 1 0 1 2 +197 52 0 0 25 1 1 0 1 1 +198 52 0 0 19 1 0 0 1 1 +199 52 0 0 20 0 0 0 0 2 +200 52 0 0 20 1 1 0 1 4 +201 28 1 0 24 1 1 0 0 1 +202 52 0 1 18 1 0 0 0 4 +203 16 1 1 28 1 0 0 1 5 +204 15 1 1 19 1 0 0 0 4 +205 52 0 0 19 1 1 0 0 1 +206 52 0 0 25 1 1 1 0 0 +207 52 0 1 19 0 0 0 0 1 +208 52 0 1 25 1 0 0 0 2 +209 14 1 0 24 1 0 0 0 0 +210 52 0 1 20 1 1 0 1 1 +211 52 0 1 30 1 0 0 1 1 +212 52 0 0 29 1 1 0 1 4 +213 52 0 0 28 0 1 1 1 4 +214 52 0 1 36 1 0 0 1 1 +215 52 0 1 23 1 1 0 0 7 +216 52 0 1 23 1 0 0 0 2 +217 52 0 0 24 1 1 0 1 4 +218 52 0 0 29 1 1 1 1 1 +219 52 0 0 26 1 1 1 1 2 +220 52 0 0 39 0 1 1 0 3 +221 52 0 1 20 1 1 0 1 1 +222 52 0 0 23 1 1 0 1 3 +223 52 0 1 21 1 1 0 1 2 +224 52 0 1 21 1 0 0 1 1 +225 7 1 1 20 0 0 0 1 2 +226 52 0 1 20 0 0 0 1 3 +227 52 0 1 27 1 1 0 1 2 +228 43 1 0 18 0 1 0 0 3 +229 46 1 1 25 1 1 0 0 1 +230 40 1 1 20 1 0 0 0 6 +231 52 0 1 20 1 1 0 0 5 +232 14 1 0 20 0 0 0 0 7 +233 52 0 0 24 0 1 1 0 11 +234 52 0 1 23 1 0 0 0 1 +235 8 1 0 28 1 1 0 0 4 +236 52 0 0 21 1 0 0 0 2 +237 52 0 0 25 1 0 0 0 1 +238 52 0 0 24 1 1 0 1 1 +239 52 0 0 29 1 1 1 0 3 +240 52 0 0 22 1 1 1 1 2 +241 25 1 0 28 1 0 0 1 18 +242 52 0 0 19 1 0 0 1 1 +243 52 0 0 20 1 1 0 1 1 +244 17 1 0 20 1 0 0 1 5 +245 37 1 1 22 1 1 0 1 1 +246 52 0 0 20 1 0 0 1 8 +247 52 0 1 21 1 0 0 1 2 +248 52 0 1 21 1 1 0 1 1 +249 32 1 0 19 1 0 0 1 3 +250 52 0 0 26 1 1 0 1 1 +251 52 0 0 23 1 1 1 1 2 +252 52 0 1 22 0 1 0 1 4 +253 52 0 1 24 1 1 0 0 8 +254 52 0 1 40 1 0 0 0 5 +255 52 0 1 32 1 0 0 0 2 +256 52 0 0 38 1 1 0 0 0 +257 52 0 0 26 1 1 0 1 1 +258 12 1 1 27 1 1 0 1 0 +259 52 0 0 29 1 1 1 1 3 +260 18 1 0 20 1 1 0 1 4 +261 52 0 0 22 1 0 0 0 1 +262 52 0 0 22 0 0 0 0 5 +263 14 1 1 19 1 1 0 0 12 +264 52 0 0 22 1 1 0 0 1 +265 52 0 0 19 1 0 0 0 3 +266 52 0 1 32 1 1 0 0 1 +267 52 0 1 25 1 1 0 1 2 +268 38 1 0 21 1 0 0 1 2 +269 52 0 1 36 1 1 0 1 1 +270 24 1 0 40 1 1 0 0 2 +271 20 1 1 20 1 0 0 1 1 +272 32 1 1 19 1 0 0 1 0 +273 52 0 0 18 1 0 0 1 4 +274 52 0 1 28 1 1 0 0 0 +275 52 0 1 22 1 1 0 0 2 +276 52 0 1 25 1 0 0 1 1 +277 52 0 1 28 1 1 0 0 2 +278 52 0 1 25 1 1 0 0 2 +279 52 0 1 20 1 1 0 0 4 +280 52 0 1 24 1 0 0 0 5 +281 52 0 0 24 1 1 0 0 0 +282 52 0 1 36 0 1 0 1 2 +283 52 0 1 34 1 1 0 0 1 +284 31 1 0 19 1 1 0 1 5 +285 20 1 1 23 1 0 0 1 1 +286 40 1 0 19 1 1 0 1 3 +287 52 0 1 40 1 1 0 0 2 +288 52 0 1 31 1 1 0 0 2 +289 52 0 0 23 1 1 1 1 0 +290 52 0 0 42 1 0 0 1 2 +291 42 1 1 26 1 1 1 1 1 +292 52 0 0 20 1 0 0 1 9 +293 26 1 0 27 1 1 0 1 1 +294 52 0 1 24 1 0 0 0 5 +295 52 0 0 25 1 0 0 0 2 +296 52 0 1 22 1 1 0 1 3 +297 52 0 1 20 1 0 0 1 2 +298 52 0 1 20 1 1 0 1 2 +299 47 1 0 22 1 0 0 1 3 +300 52 0 0 18 1 1 0 1 1 +301 52 0 0 20 1 1 0 1 2 +302 40 1 0 20 1 1 0 1 1 +303 52 0 0 22 1 1 0 1 2 +304 52 0 1 30 1 1 1 0 2 +305 52 0 0 36 0 1 0 0 1 +306 52 0 0 25 0 1 1 1 5 +307 21 1 0 29 1 0 0 1 3 +308 52 0 0 19 1 1 0 1 3 +309 52 0 1 24 1 1 0 1 2 +310 52 0 1 21 1 0 0 0 0 +311 52 0 1 35 1 1 0 1 6 +312 52 0 1 19 0 1 0 0 4 +313 1 1 0 20 1 0 0 0 0 +314 43 1 0 22 0 0 0 0 3 +315 24 1 0 23 1 1 0 0 1 +316 11 1 0 19 1 0 0 0 18 +317 52 0 0 18 1 0 0 1 3 +318 52 0 1 38 0 1 0 1 2 +319 52 0 1 18 0 0 0 1 6 +320 52 0 0 22 1 1 1 1 1 +321 33 1 0 21 1 0 0 1 3 +322 52 0 0 21 1 1 0 1 1 +323 46 1 1 21 1 0 0 1 5 +324 36 1 1 17 1 0 0 1 3 +325 52 0 1 22 1 1 0 1 1 +326 52 0 1 23 1 1 1 1 0 +327 18 1 1 19 1 0 0 1 4 +328 52 0 1 21 1 1 0 0 1 +329 52 0 1 35 1 1 0 0 5 +330 50 1 0 23 1 1 0 0 8 +331 52 0 1 22 1 0 0 0 2 +332 34 1 1 25 1 0 0 0 11 +333 52 0 1 20 1 1 0 0 4 +334 35 1 1 19 0 0 0 0 1 +335 52 0 0 20 1 0 0 0 1 +336 52 0 1 41 0 1 1 1 3 +337 39 1 0 23 0 1 0 1 4 +338 9 1 1 26 1 1 0 0 0 +339 52 0 0 26 1 0 0 0 2 +340 52 0 1 38 1 1 0 1 1 +341 52 0 0 27 1 1 0 1 1 +342 34 1 1 19 1 0 0 1 3 +343 52 0 0 25 1 1 0 0 1 +344 52 0 1 30 1 1 0 0 2 +345 52 0 1 42 1 1 0 0 1 +346 44 1 0 20 1 1 0 1 2 +347 52 0 1 23 1 1 0 1 1 +348 52 0 0 21 1 0 0 1 3 +349 35 1 1 20 1 1 0 0 3 +350 30 1 0 17 1 0 0 0 1 +351 39 1 1 26 1 0 1 0 5 +352 52 0 1 24 1 1 0 1 1 +353 52 0 0 37 1 1 1 1 1 +354 52 0 0 28 1 1 1 1 1 +355 52 0 0 33 1 1 0 1 0 +356 19 1 1 22 1 0 0 1 4 +357 52 0 0 25 1 1 1 1 2 +358 43 1 0 20 0 0 1 0 10 +359 52 0 0 20 1 0 0 0 1 +360 48 1 1 24 0 1 0 0 4 +361 37 1 1 26 0 0 0 0 11 +362 20 1 1 26 1 1 0 1 1 +363 52 0 0 25 0 1 0 1 1 +364 52 0 0 26 1 1 0 1 1 +365 36 1 1 23 1 0 0 0 3 +366 52 0 1 28 1 0 0 1 4 +367 52 0 0 27 1 1 0 1 0 +368 52 0 1 23 1 0 0 0 3 +369 52 0 0 17 1 0 0 1 7 +370 52 0 0 20 0 0 0 1 4 +371 52 0 1 20 1 0 0 1 5 +372 52 0 1 20 0 0 0 1 9 +373 30 1 1 22 1 1 0 1 2 +374 52 0 0 31 1 0 0 1 1 +375 52 0 0 43 1 1 0 0 1 +376 52 0 0 29 1 1 0 0 1 +377 52 0 1 21 1 0 0 0 0 +378 52 0 0 24 0 1 0 0 2 +379 52 0 1 30 1 1 1 0 3 +380 52 0 0 22 1 1 0 0 0 +381 52 0 1 26 1 1 1 0 1 +382 42 1 1 20 1 1 0 0 0 +383 52 0 0 23 1 1 0 0 6 +384 52 0 0 25 1 1 0 1 5 +385 52 0 1 40 1 1 0 1 2 +386 52 0 1 30 1 1 1 1 0 +387 26 1 0 22 1 0 1 1 2 +388 40 1 0 18 1 0 0 1 2 +389 52 0 0 18 1 0 0 1 0 +390 52 0 0 24 1 1 0 1 2 +391 52 0 1 25 1 1 1 1 2 +392 35 1 1 19 1 0 0 1 2 +393 52 0 0 24 1 1 0 1 2 +394 46 1 0 24 1 0 1 1 2 +395 52 0 0 18 1 1 0 1 3 +396 49 1 1 18 1 1 0 1 0 +397 52 0 0 23 1 1 0 0 0 +398 52 0 0 20 1 0 0 1 2 +399 49 1 1 18 1 1 0 1 1 +400 52 0 0 23 1 1 0 1 2 +401 52 0 1 20 1 0 0 1 1 +402 52 0 0 23 1 0 0 0 5 +403 52 0 0 23 1 1 0 1 1 +404 52 0 0 23 1 1 1 1 2 +405 35 1 0 20 1 1 0 1 4 +406 52 0 1 26 1 1 1 0 4 +407 52 0 1 30 1 0 0 0 1 +408 52 0 1 36 0 1 0 0 4 +409 52 0 1 43 1 1 0 0 4 +410 27 1 0 20 0 1 0 0 1 +411 52 0 1 24 1 1 0 1 1 +412 52 0 0 22 1 1 0 0 1 +413 52 0 1 20 1 0 1 0 1 +414 52 1 0 21 1 0 0 0 0 +415 45 1 1 18 1 0 0 0 5 +416 4 1 0 18 1 1 0 0 1 +417 52 1 0 33 1 1 0 1 2 +418 36 1 1 19 1 0 0 1 2 +419 52 0 1 21 0 1 0 1 1 +420 52 0 1 21 1 0 0 1 1 +421 8 1 1 21 1 1 0 1 4 +422 15 1 1 22 1 0 0 1 3 +423 52 0 0 18 1 0 0 1 3 +424 19 1 0 18 1 0 0 0 2 +425 52 0 0 24 1 1 0 1 2 +426 12 1 1 22 1 1 1 1 2 +427 52 0 1 31 0 1 0 1 3 +428 52 0 0 20 1 0 0 1 1 +429 52 0 1 20 1 1 1 1 1 +430 52 0 0 29 1 1 0 1 3 +431 52 0 1 24 1 1 0 1 1 diff -r 000000000000 -r dd49a7040643 lifelines_tool/test-data/readme_sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lifelines_tool/test-data/readme_sample Wed Aug 09 11:12:16 2023 +0000 @@ -0,0 +1,119 @@ +## Lifelines tool starting. +Using data header = Index(['Unnamed: 0', 'week', 'arrest', 'fin', 'age', 'race', 'wexp', 'mar', + 'paro', 'prio'], + dtype='object') time column = week status column = arrest +Logrank test for race - 0 vs 1 + + + t_0 = -1 + null_distribution = chi squared +degrees_of_freedom = 1 + alpha = 0.99 + test_name = logrank_test + +--- + test_statistic p -log2(p) + 0.58 0.45 1.16 +### Lifelines test of Proportional Hazards results with prio, age, race, paro, mar, fin as covariates on KM and CPH in lifelines test + + duration col = 'week' + event col = 'arrest' + baseline estimation = breslow + number of observations = 432 +number of events observed = 114 + partial log-likelihood = -659.00 + time fit was run = 2023-08-09 07:43:37 UTC + +--- + coef exp(coef) se(coef) coef lower 95% coef upper 95% exp(coef) lower 95% exp(coef) upper 95% +covariate +prio 0.10 1.10 0.03 0.04 0.15 1.04 1.16 +age -0.06 0.94 0.02 -0.10 -0.02 0.90 0.98 +race 0.32 1.38 0.31 -0.28 0.92 0.75 2.52 +paro -0.09 0.91 0.20 -0.47 0.29 0.62 1.34 +mar -0.48 0.62 0.38 -1.22 0.25 0.30 1.29 +fin -0.38 0.68 0.19 -0.75 -0.00 0.47 1.00 + + cmp to z p -log2(p) +covariate +prio 0.00 3.53 <0.005 11.26 +age 0.00 -2.95 <0.005 8.28 +race 0.00 1.04 0.30 1.75 +paro 0.00 -0.46 0.65 0.63 +mar 0.00 -1.28 0.20 2.32 +fin 0.00 -1.98 0.05 4.40 +--- +Concordance = 0.63 +Partial AIC = 1330.00 +log-likelihood ratio test = 32.77 on 6 df +-log2(p) of ll-ratio test = 16.39 + + + Bootstrapping lowess lines. May take a moment... + + + Bootstrapping lowess lines. May take a moment... + +The ``p_value_threshold`` is set at 0.01. Even under the null hypothesis of no violations, some +covariates will be below the threshold by chance. This is compounded when there are many covariates. +Similarly, when there are lots of observations, even minor deviances from the proportional hazard +assumption will be flagged. + +With that in mind, it's best to use a combination of statistical tests and visual tests to determine +the most serious violations. Produce visual plots using ``check_assumptions(..., show_plots=True)`` +and looking for non-constant lines. See link [A] below for a full example. + + + null_distribution = chi squared +degrees_of_freedom = 1 + model = + test_name = proportional_hazard_test + +--- + test_statistic p -log2(p) +age km 6.99 0.01 6.93 + rank 7.40 0.01 7.26 +fin km 0.02 0.90 0.15 + rank 0.01 0.91 0.13 +mar km 1.64 0.20 2.32 + rank 1.80 0.18 2.48 +paro km 0.06 0.81 0.31 + rank 0.07 0.79 0.34 +prio km 0.92 0.34 1.57 + rank 0.88 0.35 1.52 +race km 1.70 0.19 2.38 + rank 1.68 0.19 2.36 + + +1. Variable 'age' failed the non-proportional test: p-value is 0.0065. + + Advice 1: the functional form of the variable 'age' might be incorrect. That is, there may be +non-linear terms missing. The proportional hazard test used is very sensitive to incorrect +functional forms. See documentation in link [D] below on how to specify a functional form. + + Advice 2: try binning the variable 'age' using pd.cut, and then specify it in `strata=['age', +...]` in the call in `.fit`. See documentation in link [B] below. + + Advice 3: try adding an interaction term with your time variable. See documentation in link [C] +below. + + + Bootstrapping lowess lines. May take a moment... + + + Bootstrapping lowess lines. May take a moment... + + + Bootstrapping lowess lines. May take a moment... + + + Bootstrapping lowess lines. May take a moment... + + +--- +[A] https://lifelines.readthedocs.io/en/latest/jupyter_notebooks/Proportional%20hazard%20assumption.html +[B] https://lifelines.readthedocs.io/en/latest/jupyter_notebooks/Proportional%20hazard%20assumption.html#Bin-variable-and-stratify-on-it +[C] https://lifelines.readthedocs.io/en/latest/jupyter_notebooks/Proportional%20hazard%20assumption.html#Introduce-time-varying-covariates +[D] https://lifelines.readthedocs.io/en/latest/jupyter_notebooks/Proportional%20hazard%20assumption.html#Modify-the-functional-form +[E] https://lifelines.readthedocs.io/en/latest/jupyter_notebooks/Proportional%20hazard%20assumption.html#Stratification + diff -r 000000000000 -r dd49a7040643 lifelines_tool/tongue.tab --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lifelines_tool/tongue.tab Wed Aug 09 11:12:16 2023 +0000 @@ -0,0 +1,81 @@ +type time delta +1 1 1 +1 3 1 +1 3 1 +1 4 1 +1 10 1 +1 13 1 +1 13 1 +1 16 1 +1 16 1 +1 24 1 +1 26 1 +1 27 1 +1 28 1 +1 30 1 +1 30 1 +1 32 1 +1 41 1 +1 51 1 +1 65 1 +1 67 1 +1 70 1 +1 72 1 +1 73 1 +1 77 1 +1 91 1 +1 93 1 +1 96 1 +1 100 1 +1 104 1 +1 157 1 +1 167 1 +1 61 0 +1 74 0 +1 79 0 +1 80 0 +1 81 0 +1 87 0 +1 87 0 +1 88 0 +1 89 0 +1 93 0 +1 97 0 +1 101 0 +1 104 0 +1 108 0 +1 109 0 +1 120 0 +1 131 0 +1 150 0 +1 231 0 +1 240 0 +1 400 0 +2 1 1 +2 3 1 +2 4 1 +2 5 1 +2 5 1 +2 8 1 +2 12 1 +2 13 1 +2 18 1 +2 23 1 +2 26 1 +2 27 1 +2 30 1 +2 42 1 +2 56 1 +2 62 1 +2 69 1 +2 104 1 +2 104 1 +2 112 1 +2 129 1 +2 181 1 +2 8 0 +2 67 0 +2 76 0 +2 104 0 +2 176 0 +2 231 0