###간단 코드
grad_squared = 0
while True:
dx = compute_gradient(x)
grad_squared += dx * dx
x -= learning_rate * dx / (np.sqrt(grad_squared) + 1e-7)
compute_gradient(x) ==> G_t += [g_t[0]^2, ...,g_t[d]^2]
np.sqrt(grad_squared)
@@@AdaGrad
grad_squared = 0
while True:
dx = compute_gradient(x)
grad_squared += dx * dx
x -= learning_rate * dx / (np.sqrt(grad_squared) + 1e-7)
@@@RMSProp
grad_squared = 0
while True:
dx = compute_gradient(x)
grad_squared += decay_rate * grad_squared + (1 - decay_rate) * dx * dx
x -= learning_rate * dx / (np.sqrt(grad_squared) + 1e-7)
@@@AdaGrad/RMSProp
first_moment = 0
second_moment = 0
while True:
dx = compute_gradient(x)
first_moment = beta1 * first_moment + (1 - beta1) *dx
second_moment = beta2 * second_monent + (1 -beta2) * dx * dx
x -= learning_rate * first_moment / (np.sqrt(grad_squared) + 1e-7)
@@@Adam(fullform)
first_moment = 0
second_moment = 0
while True:
dx = compute_gradient(x)
first_moment = beta1 * first_moment + (1 - beta1) *dx
second_moment = beta2 * second_monent + (1 -beta2) * dx * dx
first_unbias = first_moment / (1 - beta1 **t)
second_unbias = second_moment / (1 -beta2 **t)
x -= learning_rate * first_unbias / (np.sqrt(second_unbias) + 1e-7)
https://daeson.tistory.com/167