Locally weighted version/solution to HW1-1b&c in python (works for the "public" data set too), feel free to email me if have questions. jklemail@gmail.com
loaddata.py:
------------------
from numpy import array
def load(name):
''' loads data from 'data' folder, expects x,y data files to be:
non-binary data, separated by whitespace, and named:
namex.dat and namey.dat
Eg. X,Y = load('q1')
'''
xin = open('data/%sx.dat' % name)
yin = open('data/%sy.dat' % name)
Xtrain = array([line.split() for line in xin],dtype=float)
Ytrain = array([line for line in yin],dtype=float)
return Xtrain, Ytrain
plotter.py:
------------------
from matplotlib.pyplot import *
from numpy import *
import regres
reload(regres)
def plotlogr(X, y, res=50):
''' plots logistic regression, expects:
X to be m x 2 of training set
see logr() for other params
'''
clf()
hold(True)
imax, imin = amax(X,axis=0)[0], amin(X,axis=0)[0]
irange = imax-imin
X0, X1 = X[:,0], X[:,1]
x = linspace(imin, imax, res)
theta = regres.logr(X,y,x)
hx = (-theta[0]-theta[1]*x)/theta[2]
plot(X0[y<0.5],X1[y<0.5],'o')
plot(X0[y>=.5],X1[y>=.5],'x')
plot(x,hx,'r.-')
def plotlwlr(X, y, tau, res):
''' plots local weighted linear regression, expects:
res (resolution of prediction mapping) to be integer >= 2
X to be m x 2 of training set
see lwlr() for other params
'''
imax, imin = amax(X,axis=0)[0], amin(X,axis=0)[0]
irange = imax-imin
jmax, jmin = amax(X,axis=0)[1], amin(X,axis=0)[1]
jrange = jmax-jmin
hx = [[0]*res for _ in range(res)]
for i in range(res):
for j in range(res):
x = array([i*irange/(res-1)+imin, j*jrange/(res-1)+jmin])
hx[j][i] = regres.lwlr(X, y, x, tau) # new x prediction mapping "cells"
clf()
hold(True)
imshow(array(hx), origin='lower', interpolation='nearest')
X0, X1 = X[:,0], X[:,1]
iscale = res/irange
jscale = res/jrange
offset = -0.5
#map (scale) data onto 0:res by 0:res cells plot:
plot((X0[y<0.5]-imin)*iscale+offset, (X1[y<0.5]-jmin)*jscale+offset, 'o')
plot((X0[y>=.5]-imin)*iscale+offset, (X1[y>=.5]-jmin)*jscale+offset, 'x')
text(res/2-res/7, res+res/20, 'tau = %f'% tau
regres.py:
---------------------------
from numpy import *
ZERO = 1e-5
def lwlr(Xtrain, Ytrain, x, tau):
''' performs local weighted logistic regression, expects:
Xtrain to be m x n
Ytrain to be m vector
x to be n vector
tau between .01 and 10 (tau->0 = wt->inf = overfit, tau->inf = wt->1 = unweighted)
'''
m = Xtrain.shape[0]
n = Xtrain.shape[1]
theta = zeros(n)
# wt: p.15, sum(_,axis=1) = sum the n cols of (Xi-xi)^2 into a single col of m vector
wt = exp( -sum((Xtrain - tile(x,(m,1)))**2, axis=1) / (2*tau))
# Newton's method:
XT = Xtrain.T
grd = ones((n,1))
lam = 1e-4
while (linalg.norm(grd) > ZERO):
# hx: p.16, all others from public sol p.2
hx = 1. / (1 + exp(-dot(Xtrain,theta))) # m vector
grd = dot(XT, wt*(Ytrain-hx)) - lam*theta # n vector
D = -diag(wt*hx*(1-hx)) # m vector
hessian = dot(XT, dot(D,Xtrain)) -lam*eye(n) # n x n
theta = theta - dot(linalg.inv(hessian),grd) # n vector
return double(dot(x.T,theta) > 0)
def logr(Xtrain, Ytrain, x):
''' performs logistic regression, expects:
Xtrain to be m x n
Ytrain to be m vector
x to be n vector
'''
m = Xtrain.shape[0]
n = Xtrain.shape[1]+1
x = ones((m,n))
x[:,1:3] = Xtrain
theta = zeros(n)
# Newton's method:
grd = ones(n)
for i in range(20): #there must be a better way..
# hx: p.16, all others from public sol p.2
hx = 1. / (1 + exp(-dot(x,theta))) # m vector
#prvGrdNorm = grd.copy()
grd = dot(x.T, (Ytrain-hx)) # n vector
hessian = -dot(x.T**2, (hx*(1-hx))) # n vector
theta = theta - grd/hessian # n vector
return theta
Comments (0)
You don't have permission to comment on this page.