Reinforcement learning: exploring reinforcement learning through the evolution of the Q-Learner
| Create Date: August 21, 2019 at 06:00 PM         | Tag: MACHINE LEARNING         | Author Name: Sun, Charles |
Evan Hennis @ Google user group
Environment setup
TensorFlow 2.0.0 beta1
Jupyter Note books
Bellman's Equation
Value iteration
- instead of the optimal policy we find the maximum value
- value funciton
Policy Iteration
- update the policy direc
Q-Learning
deep Q-Network
google colab
New CommentData analytics
| Create Date: June 21, 2018 at 11:41 PM         | Tag: DATA MINING         | Author Name: Sun, Charles |
Four areas of product development:
- Engagement
- growth
- utility
- core business
skills:
- product/business sense
- visualizing and comminicating data effectively
- applied statistics
- experimentation/testing
- resourceful
- focused on results
technologies:
- hive/hadoop
- mysql/oracle
- python/php
- excel/R
- presto, scuba (open source)
Typical questions:
- what happens when everyone in the world is on a smartone?
- what should we build next?
- how does seasonality affect usage?
- what types of people use this feature?
- why are people posting 5% more status updates this wek?
- should we launch this new feature?
- interpreting the data with scientific rigor
- understanding what the metrics tell us about people’s expecriences
- making trade-offs with conflicting results
- can we identify “bad” content by how people engage with it?
Recommender Systems Python 3 code
| Create Date: April 27, 2018 at 11:49 PM         | Tag: MACHINE LEARNING         | Author Name: Sun, Charles |
class:
'''
Created on Apr 26, 2018
@author: charles
'''
import numpy as np
class Recommender:
def __init__(self):
pass
def CheckCostFunction(self, Lambda = None):
if Lambda == None:
Lambda = 0
X_t = np.random.rand(4,3)
theta_t = np.random.rand(5,3)
Y = X_t.dot(theta_t.T)
Y[np.random.rand(np.shape(Y)[0]) > 0.5] = 0
R = np.zeros((np.shape(Y)))
R[Y != 0] = 1
m, n = np.shape(X_t)
X = np.random.randn(m,n)
a, b = np.shape(theta_t)
theta = np.random.randn(a,b)
num_users = np.shape(Y)[1]
num_movies = np.shape(Y)[0]
num_features = np.shape(theta_t)[1]
def J(t):
return self.CofiCostFunc(t, Y, R, num_users, num_movies, \
num_features, Lambda)
numgrad = self.ComputeNumericalGradient(J, \
np.append(X.flatten(), theta.flatten(), 0))
cost, grad = self.CofiCostFunc(np.append(X.flatten(), \
theta.flatten(), 0), Y, R, num_users, \
num_movies, num_features, Lambda)
print(numgrad, grad)
print('The above two columns you get should be very similar.')
print('(Left-Your Numerical Gradient, Right-Analytical Gradient)')
diff = np.linalg.norm(numgrad-grad)/np.linalg.norm(numgrad+grad)
print('If your backpropagation implementation is correct, then \
the relative difference will be small (less than 1e-9).\
Relative Difference: ', diff)
def CofiCost(self, params, Y, R, num_users, num_movies, num_features, Lambda):
X = params[0:num_movies*num_features].reshape(num_movies, num_features)
theta = params[num_movies*num_features:].reshape(num_users, num_features)
J = 0
J = 1/2*np.sum((X.dot(theta.T)*R-Y*R)**2) + \
Lambda/2*((np.sum(theta**2)) + np.sum(X**2))
return J
def CofiGradient(self, params, Y, R, num_users, num_movies, num_features, Lambda):
X = params[0:num_movies*num_features].reshape(num_movies, num_features)
theta = params[num_movies*num_features:].reshape(num_users, num_features)
X_grad = np.zeros((np.shape(X)))
theta_grad = np.zeros((np.shape(theta)))
theta_grad = (X.dot(theta.T)*R-Y*R).T.dot(X) + Lambda*theta
grad = np.append(X_grad.flatten(), theta_grad.flatten(), 0)
return grad
def CofiCostFunc(self, params, Y, R, num_users, num_movies, num_features, Lambda):
X = params[0:num_movies*num_features].reshape(num_movies, num_features)
theta = params[num_movies*num_features:].reshape(num_users, num_features)
J = 0
X_grad = np.zeros((np.shape(X)))
theta_grad = np.zeros((np.shape(theta)))
J = 1/2*np.sum((X.dot(theta.T)*R-Y*R)**2) + \
Lambda/2*((np.sum(theta**2)) + np.sum(X**2))
X_grad = (X.dot(theta.T)*R-Y*R).dot(theta) + Lambda*X
theta_grad = (X.dot(theta.T)*R-Y*R).T.dot(X) + Lambda*theta
grad = np.append(X_grad.flatten(), theta_grad.flatten(), 0)
return [J, grad]
def ComputeNumericalGradient(self, J, theta):
numgrad = np.zeros(np.shape(theta))
perturb = np.zeros(np.shape(theta))
e = 1e-4
for p in range(len(theta.flatten())):
perturb[p] = e
loss1, grad1 = J(theta-perturb)
loss2, grad2 = J(theta+perturb)
numgrad[p] = (loss2 - loss1)/(2*e)
perturb[p] = 0
return numgrad
def LoadMovieList(self):
counter = 0
movielist = {}
with open('movie_ids.txt', 'rb') as fid:
lines = fid.readlines()
for line in lines:
movielist[counter] = str(line).split(' ', 1)[1]
counter += 1
return movielist
def NormalizeRatings(self,Y, R):
m, n = np.shape(Y)
Ymean = np.zeros((m,1))
Ynorm = np.zeros((m,n))
for i in range(m):
idx = np.where(R[i,:] == 1)
Ymean[i] = np.mean(Y[i,idx])
Ynorm[i,idx] = Y[i,idx] - Ymean[i]
return [Ynorm, Ymean]
Unit test: not sure why the result is very different compared to the course. I will keep digging
'''
Created on Apr 26, 2018
@author: charles
'''
import unittest
import numpy as np
from recommender import Recommender
class Test(unittest.TestCase):
def setUp(self):
self.r = Recommender()
def tearDown(self):
self.r = None
def test_cofiCostFunc(self):
params = np.array(range(1, 15))/10.
Y = np.sin(np.array([[16,2,3,13],
[5,11,10,8],
[9,7,6,12],
[4,14,15,1]]))
Y = Y[:, 0:3]
R = (np.array([[1, 0, 1], [1, 1, 1], [0, 0, 1], [1, 1, 0]]) > 0.5).astype(int)
num_users = 3
num_movies = 4
num_features = 2
J, grad = self.r.CofiCostFunc(params, Y, R, \
num_users, num_movies, \
num_features, 0)
print(J)
print(grad)
self.assertAlmostEqual(8.513, J, None, None, 1e-2)
if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.testName']
unittest.main()
Integeration check:
'''
Created on Apr 27, 2018
@author: charles
'''
import scipy.io
import matplotlib.pyplot as plt
import numpy as np
import scipy.optimize as opt
from recommender import Recommender
r = Recommender()
print('Loading movie ratings dataset.')
data = scipy.io.loadmat('ex8_movies.mat')
R = data['R']
Y = data['Y']
print('Average rating for movie 1 (Toy Story): %8.8f/5 ' \
%np.mean(Y[0,np.where(R[0,:] -1 == 0)]))
plt.figure(figsize=(5, 5))
plt.imshow(Y)
plt.show()
data1 = scipy.io.loadmat('ex8_movieParams.mat')
X = data1['X']
theta = data1['Theta']
# Reduce the data set size so that this runs faster
num_users = 4
num_movies = 5
num_features = 3
X = X[0:num_movies, 0:num_features]
theta = theta[0:num_users, 0:num_features]
Y = Y[0:num_movies, 0:num_users]
R = R[0:num_movies, 0:num_users]
J, grad = r.CofiCostFunc(np.append(X.flatten(), theta.flatten(), 0), \
Y, R, num_users, num_movies, num_features, 0)
print('Cost at loaded parameters: %2.2f (this value should be about 22.22)' %J)
print('Checking Gradients (without regularization) ...')
r.CheckCostFunction()
J, grad = r.CofiCostFunc(np.append(X.flatten(), theta.flatten(), 0), \
Y, R, num_users, num_movies, num_features, 1.5)
print('Cost at loaded parameters (lambda = 1.5): %2.2f \
(this value should be about 31.34)' %J)
print('Checking Gradients (with regularization) ...')
r.CheckCostFunction(1.5)
movielist = r.LoadMovieList()
my_ratings = np.zeros((1682, 1))
#Check the file movie_idx.txt for id of each movie in our dataset
#For example, Toy Story (1995) has ID 1, so to rate it "4", you can set
my_ratings[0] = 4
#Or suppose did not enjoy Silence of the Lambs (1991), you can set
my_ratings[97] = 2
# rate other movies
my_ratings[6] = 3
my_ratings[11] = 5
my_ratings[53] = 4
my_ratings[63] = 5
my_ratings[65] = 3
my_ratings[68] = 5
my_ratings[182] = 4
my_ratings[225] = 5
my_ratings[354] = 5
print('New user ratings:')
for i in range(len(my_ratings)):
if my_ratings[i] > 0:
print('Rated {} for {}'.format(my_ratings[i], movielist[i]))
print('Training collaborative filtering...')
data = scipy.io.loadmat('ex8_movies.mat')
R = data['R']
Y = data['Y']
Y = np.append(my_ratings, Y, 1)
R = np.append((my_ratings!=0)+0, R, 1)
Ynorm, Ymean =r.NormalizeRatings(Y,R)
num_users = np.shape(Y)[1]
num_movies = np.shape(Y)[0]
num_features = 10
X = np.random.randn(num_movies, num_features);
theta = np.random.randn(num_users, num_features);
initial_parameters = np.append(X.flatten(), theta.flatten(), 0)
Lambda = 10
# result = opt.fmin_cg(r.CofiCost, x0 = initial_parameters,\
# args = (Y, R, num_users, num_movies, num_features, Lambda), \
# maxiter = 50, fprime = r.CofiGradient, \
# disp=True, full_output=True)
#
# theta = result[0]
result = opt.minimize(fun = r.CofiCost, x0 = initial_parameters,\
args = (Y, R, num_users, num_movies, num_features, Lambda), \
method = 'CG', jac = r.CofiGradient, \
options = {'maxiter':100})
theta = result.x
# Unfold the returned theta back into U and W
X = theta[0:num_movies*num_features].reshape(num_movies, num_features)
theta = theta[num_movies*num_features:].reshape(num_users, num_features)
print('Recommender system learning completed.')
p = X.dot(theta.T)
my_predictions = p[:,0].reshape(np.shape(p)[0],1) + Ymean
movielist = r.LoadMovieList()
ix = np.argsort(my_predictions, axis=0, kind ='mergesort')[::-1]
my_predictions = my_predictions[ix]
my_predictions = my_predictions.flatten()
print('Top recommendations for you:')
for i in range(10):
j = ix[i,0]
print('Predicting rating %1.1f for movie %s' %(my_predictions[i],\
movielist[j]))
print('Original ratings provided:')
for i in range(len(my_ratings)):
if my_ratings[i] > 0:
print('Rated {} for {}'.format(my_ratings[i], movielist[i]))
New Comment