Source code for selectinf.learning.Rfitters

import uuid, functools, warnings

import numpy as np
from scipy.stats import norm as ndist

try:
    import rpy2.robjects as rpy
    import rpy2.robjects.numpy2ri
    rpy.r('library(splines)')
    rpy.r('library(gbm)')
    rpy.r('library(randomForest)')
except:
    warnings.warn('rpy2 not available, Rfitters will not work')

[docs]def logit_fit(T, Y, df=20): rpy2.robjects.numpy2ri.activate() rpy.r.assign('T', T) rpy.r.assign('df', df) rpy2.robjects.numpy2ri.deactivate() fitfns = [] for j in range(Y.shape[1]): y = Y[:,j].astype(np.int) rpy2.robjects.numpy2ri.activate() rpy.r.assign('Y', y) uuid_label = str(uuid.uuid1())[:8] cmd = ''' Y = as.numeric(Y) T = as.matrix(T) colnames(T) = c(%s) cur_data = data.frame(Y, T) M = glm(Y ~ %s, family=binomial(link='logit'), data=cur_data) fitfn_%s = function(t) { t = data.frame(t) colnames(t) = c(%s) return(predict(M, newdata=t, type='link')) } ''' % (', '.join(['"T%d"' % i for i in range(1, T.shape[1]+1)]), ' + '.join(['ns(T%d, df)' % i for i in range(1, T.shape[1]+1)]), uuid_label, ', '.join(['"T%d"' % i for i in range(1, T.shape[1]+1)])) rpy.r(cmd) rpy2.robjects.numpy2ri.deactivate() # this is a little fragile obviously as someone might overwrite fitfn def fitfn(t): rpy2.robjects.numpy2ri.activate() fitfn_r = rpy.r('fitfn_%s' % uuid_label) val = np.asarray(fitfn_r(t)) rpy2.robjects.numpy2ri.deactivate() result = np.zeros(t.shape[0]) test = val < 0 result[test] = np.exp(val[test]) / (1 + np.exp(val[test])) result[~test] = 1 / (1 + np.exp(-val[~test])) return result fitfns.append(fitfn) return fitfns
[docs]def probit_fit(T, Y, df=20): rpy2.robjects.numpy2ri.activate() rpy.r.assign('T', T) rpy.r.assign('df', df) fitfns = [] for j in range(Y.shape[1]): y = Y[:,j].astype(np.int) rpy2.robjects.numpy2ri.activate() rpy.r.assign('Y', y) uuid_label = str(uuid.uuid1())[:8] cmd = ''' Y = as.numeric(Y) T = as.matrix(T) colnames(T) = c(%s) cur_data = data.frame(Y, T) M = glm(Y ~ %s, family=binomial(link='probit'), data=cur_data) fitfn_%s = function(t) { t = data.frame(t) colnames(t) = c(%s) return(predict(M, newdata=t, type='link')) } ''' % (', '.join(['"T%d"' % i for i in range(1, T.shape[1]+1)]), ' + '.join(['ns(T%d, df)' % i for i in range(1, T.shape[1]+1)]), uuid_label, ', '.join(['"T%d"' % i for i in range(1, T.shape[1]+1)])) rpy.r(cmd) rpy2.robjects.numpy2ri.deactivate() # this is a little fragile obviously as someone might overwrite fitfn def fitfn(t): rpy2.robjects.numpy2ri.activate() fitfn_r = rpy.r('fitfn_%s' % uuid_label) val = np.asarray(fitfn_r(t)) rpy2.robjects.numpy2ri.deactivate() return ndist.cdf(val) fitfns.append(fitfn) return fitfns
[docs]def gbm_fit(T, Y, ntrees=5000): rpy2.robjects.numpy2ri.activate() rpy.r.assign('T', T) rpy.r.assign('n.trees', ntrees) rpy2.robjects.numpy2ri.deactivate() fitfns = [] for j in range(Y.shape[1]): y = Y[:,j].astype(np.int) rpy2.robjects.numpy2ri.activate() rpy.r.assign('Y', y) uuid_label = str(uuid.uuid1())[:8] cmd = ''' Y = as.numeric(Y) T = as.matrix(T) colnames(T) = c(%s) cur_data = data.frame(Y, T) M = gbm(Y ~ %s, distribution='bernoulli', data=cur_data, n.trees=n.trees) fitfn_%s = function(t) { t = data.frame(t) colnames(t) = colnames(T) val = predict(M, newdata=t, n.trees=n.trees) return(val) } ''' % (', '.join(['"T%d"' % i for i in range(1, T.shape[1]+1)]), ' + '.join(['T%d' % i for i in range(1, T.shape[1]+1)]), uuid_label) rpy.r(cmd) rpy2.robjects.numpy2ri.deactivate() # this is a little fragile obviously as someone might overwrite fitfn def fitfn(t): rpy2.robjects.numpy2ri.activate() fitfn_r = rpy.r('fitfn_%s' % uuid_label) val = np.asarray(fitfn_r(t)) rpy2.robjects.numpy2ri.deactivate() result = np.zeros(t.shape[0]) test = val < 0 result[test] = np.exp(val[test]) / (1 + np.exp(val[test])) result[~test] = 1 / (1 + np.exp(-val[~test])) return result fitfns.append(fitfn) return fitfns
[docs]def random_forest_fit(T, Y, ntrees=5000): rpy2.robjects.numpy2ri.activate() rpy.r.assign('T', T) rpy.r.assign('ntree', ntrees) rpy2.robjects.numpy2ri.deactivate() fitfns = [] for j in range(Y.shape[1]): y = Y[:,j].astype(np.int) rpy2.robjects.numpy2ri.activate() rpy.r.assign('Y', y) uuid_label = str(uuid.uuid1())[:8] cmd = ''' Y = as.numeric(Y) Y = as.factor(Y) T = as.matrix(T) colnames(T) = c(%s) cur_data = data.frame(Y, T) M = randomForest(Y ~ %s, data=cur_data, ntree=ntree) fitfn_%s = function(t) { t = data.frame(t) colnames(t) = colnames(T) val = predict(M, newdata=t, type='prob')[,2] # second column is for 1 return(val) } ''' % (', '.join(['"T%d"' % i for i in range(1, T.shape[1]+1)]), ' + '.join(['T%d' % i for i in range(1, T.shape[1]+1)]), uuid_label) rpy.r(cmd) rpy2.robjects.numpy2ri.deactivate() # this is a little fragile obviously as someone might overwrite fitfn def fitfn(t): rpy2.robjects.numpy2ri.activate() fitfn_r = rpy.r('fitfn_%s' % uuid_label) val = np.asarray(fitfn_r(t)) rpy2.robjects.numpy2ri.deactivate() return val fitfns.append(fitfn) return fitfns