Neural_Network/NewtonRaphson at master · Jorghi12/Neural_Network · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#Implementation of Newton Raphson algorithm

#Locally weighted logistic regression utilizing Newton Raphson algorithm for optimization
#X - The training input
#Y - The training label
#x - The query point
#tau - The weight bandwidth parameter
library(ggplot2)

magnitude <- function(x) sqrt(sum(x^2))

ComputeWi <- function(x,x_q,tau){
  weight <- exp(-magnitude(x - x_q)^2/(2*tau^2))
  return (weight)
}

NewtonRaphson_Update <- function(weight, H, J_grad){
  #solve(H) returns the inverse of H
  weight <- weight - solve(H) %*% J_grad
  return (weight)
}

ComputeH <- function(X,D,tau){
  H <- t(X) %*% (D %*% X) - tau*diag(ncol(X))
  return (H)
}

Logis <- function(xi,theta){
  return (1/(1 + exp(-t(theta)%*%xi)))
}

ComputeD <- function(X,theta, Wis){
  D <- diag(nrow(X))
  diagonals <- apply(X, 1, function(x_row) Logis(x_row,theta)*(1 - Logis(x_row,theta)))
  diag(D) <- - Wis * diagonals
  return (D)
}

ComputeL_grad <- function(X,Y,Wis,theta,tau){
  LogisAll <- apply(X, 1, function(x_row) Logis(x_row,theta))
  z <- Wis * (Y - LogisAll)
  L <- t(X) %*% z - tau*theta
  return (L)
}

LocalWeightLR <- function(X, Y, x_query, tau = .01, theta = c(0,0)){
  #Create theta vector initialized to zero above

  #Compute the weights for each training example with respect to our query point x_query
  Wis <- apply(X, 1, function(x_row) ComputeWi(x_row,x_query,tau))

  #Compute the Diagonal Matrix of H = (X^T)(D)(X) - tau*I
  D <- ComputeD(X,theta,Wis)

  #Compute the Hessian matrix H
  H <- ComputeH(X,D,tau)

  #Compute the gradient of the likelihood function with respect to theta
  L_grad <- ComputeL_grad(X,Y,Wis,theta,tau)

  #Perform a Newton Raphson Update on the parameteres
  theta <- NewtonRaphson_Update(theta, H, L_grad)

  return (theta)
}

PredictY <- function(x_query,iterations=1000,tau=.01, theta = c(0,0)){
  Expected_theta <- ExecuteIterations(x_query,iterations,tau, theta)
  return (Logis(x_query, Expected_theta) >= .5)
}

ExecuteIterations <- function(x_query,iterations=1000,tau=.01, theta = c(0,0)){
  for (i in 1:iterations){
    theta <- LocalWeightLR(X,Y,x_query,tau,theta)
  }
  return (theta)
}

TestingError <- function(X,Y,iterations=1000,tau=.01, theta = c(0,0)){
  results <- apply(X, 1, function(x_row) PredictY(x_row,iterations,tau, theta))
  total_error <- abs(Y - results)

  return (total_error)
}

DataLikelihood <- function(X,Y,Wis,theta,tau){
  Regularizer <- -tau/2 * (t(theta)%*%theta)
  Variance <- 0
  for (i in 1:nrow(X)){
    Variance <- Variance + Wis[i,]*(Y[i,]*log(Logis(X[i,],theta)) + (1 - Y[i,])*(log(1-Logis(X[i,],theta))))
  }

  Likelihood <- Regularizer + Variance
  return (Likelihood)
}

X <- read.table("q2\\data\\x.dat",header=F)
Y <- read.table("q2\\data\\y.dat",header=F)

X <- data.matrix(X)
Y <- data.matrix(Y)


#library(ggplot2)
#Correct predictions are blue
#Make sure to play around with theta!
total_error <- TestingError(X,Y,iterations=5,tau=.05, c(0,0))
df <- data.frame(X1 = X[,1],X2 = X[,2], Colr = ifelse(total_error[,] == 0,"#3399FF","#FF0000"))
ggplot(df,aes(x = X1, y = X2, colour = Colr)) + geom_point(size=5) +
  scale_colour_identity(guide="legend",breaks=c("#3399FF","#FF0000"))

print(c("Total error of:",sum(total_error)/nrow(Y),"%"))
#Results from TestError with 5 iterations
#Values of tau:
#tau = .01   --- Errors = 0
#tau = .05   --- Errors = 0
#tau = .10   --- Errors = 0
#tau = .50   --- Errors = 15
#tau = 1.0   --- Errors = 17
#tau = 5.0   --- Errors = 17
#tau approach inf --- Errors = 16

#Low tau causes overfitting