Charles' Website

1 2 3 4 5 6 7 8 9 10 Next »

Reinforcement learning: exploring reinforcement learning through the evolution of the Q-Learner

Create Date: August 21, 2019 at 06:00 PM

Tag: MACHINE LEARNING

Author Name: Sun, Charles

Evan Hennis @ Google user group

Environment setup

TensorFlow 2.0.0 beta1

Jupyter Note books

Bellman's Equation

Value iteration

instead of the optimal policy we find the maximum value
value funciton

Policy Iteration

update the policy direc

Q-Learning

deep Q-Network

google colab

New Comment

Data analytics

Create Date: June 21, 2018 at 11:41 PM

Tag: DATA MINING

Author Name: Sun, Charles

Four areas of product development:

Engagement
growth
utility
core business

skills:

product/business sense
visualizing and comminicating data effectively
applied statistics
experimentation/testing
resourceful
focused on results

technologies:

hive/hadoop
mysql/oracle
python/php
excel/R
presto, scuba (open source)

Typical questions:

what happens when everyone in the world is on a smartone?
what should we build next?
how does seasonality affect usage?
what types of people use this feature?
why are people posting 5% more status updates this wek?
should we launch this new feature?
- interpreting the data with scientific rigor
- understanding what the metrics tell us about people’s expecriences
- making trade-offs with conflicting results
can we identify “bad” content by how people engage with it?

New Comment

Recommender Systems Python 3 code

Create Date: April 27, 2018 at 11:49 PM

Tag: MACHINE LEARNING

Author Name: Sun, Charles

class:

'''
Created on Apr 26, 2018

@author: charles
'''

import numpy as np

class Recommender:
    def __init__(self):
        pass
    
    def CheckCostFunction(self, Lambda = None):
        if Lambda == None:
            Lambda = 0
        X_t = np.random.rand(4,3)
        theta_t = np.random.rand(5,3)
        
        Y = X_t.dot(theta_t.T)
        Y[np.random.rand(np.shape(Y)[0]) > 0.5] = 0
        R = np.zeros((np.shape(Y)))
        R[Y != 0] = 1
        
        m, n = np.shape(X_t)
        X = np.random.randn(m,n)
        a, b = np.shape(theta_t)
        theta = np.random.randn(a,b)
        num_users = np.shape(Y)[1]
        num_movies = np.shape(Y)[0]
        num_features = np.shape(theta_t)[1]
        def J(t):
            return self.CofiCostFunc(t, Y, R, num_users, num_movies, \
                                    num_features, Lambda)
         
        numgrad = self.ComputeNumericalGradient(J, \
                np.append(X.flatten(), theta.flatten(), 0))
        cost, grad = self.CofiCostFunc(np.append(X.flatten(), \
                theta.flatten(), 0), Y, R, num_users, \
                              num_movies, num_features, Lambda)
        print(numgrad, grad)
        print('The above two columns you get should be very similar.')
        print('(Left-Your Numerical Gradient, Right-Analytical Gradient)')
        diff = np.linalg.norm(numgrad-grad)/np.linalg.norm(numgrad+grad)
        print('If your backpropagation implementation is correct, then \
               the relative difference will be small (less than 1e-9).\
               Relative Difference: ', diff)
               
    def CofiCost(self, params, Y, R, num_users, num_movies, num_features, Lambda):
        X = params[0:num_movies*num_features].reshape(num_movies, num_features)
        theta = params[num_movies*num_features:].reshape(num_users, num_features)
        J = 0
            
        J = 1/2*np.sum((X.dot(theta.T)*R-Y*R)**2) + \
                Lambda/2*((np.sum(theta**2)) + np.sum(X**2))
        return J
    
    
    def CofiGradient(self, params, Y, R, num_users, num_movies, num_features, Lambda):
        X = params[0:num_movies*num_features].reshape(num_movies, num_features)
        theta = params[num_movies*num_features:].reshape(num_users, num_features)
        
        X_grad = np.zeros((np.shape(X)))
        theta_grad = np.zeros((np.shape(theta)))
        
        
        theta_grad = (X.dot(theta.T)*R-Y*R).T.dot(X) + Lambda*theta
        
        grad = np.append(X_grad.flatten(), theta_grad.flatten(), 0)
        return grad
    
    
    def CofiCostFunc(self, params, Y, R, num_users, num_movies, num_features, Lambda):
        X = params[0:num_movies*num_features].reshape(num_movies, num_features)
        theta = params[num_movies*num_features:].reshape(num_users, num_features)
        J = 0
        X_grad = np.zeros((np.shape(X)))
        theta_grad = np.zeros((np.shape(theta)))
        
        J = 1/2*np.sum((X.dot(theta.T)*R-Y*R)**2) + \
                Lambda/2*((np.sum(theta**2)) + np.sum(X**2))
        X_grad = (X.dot(theta.T)*R-Y*R).dot(theta) + Lambda*X
        theta_grad = (X.dot(theta.T)*R-Y*R).T.dot(X) + Lambda*theta
        
        grad = np.append(X_grad.flatten(), theta_grad.flatten(), 0)
        return [J, grad]
    
    def ComputeNumericalGradient(self, J, theta):
        numgrad = np.zeros(np.shape(theta))
        perturb = np.zeros(np.shape(theta))
        e = 1e-4
        for p in range(len(theta.flatten())):
            perturb[p] = e
            loss1, grad1 = J(theta-perturb)
            loss2, grad2 = J(theta+perturb)
            numgrad[p] = (loss2 - loss1)/(2*e)
            perturb[p] = 0
        return numgrad
    
    def LoadMovieList(self):
        counter = 0
        movielist = {}
        with open('movie_ids.txt', 'rb') as fid:
            lines = fid.readlines()
            for line in lines:
                movielist[counter] = str(line).split(' ', 1)[1]
                counter += 1
        return movielist
    
    def NormalizeRatings(self,Y, R):
        m, n = np.shape(Y)
        Ymean = np.zeros((m,1))
        Ynorm = np.zeros((m,n))
        for i in range(m):
            idx = np.where(R[i,:] == 1)
            Ymean[i] = np.mean(Y[i,idx])
            Ynorm[i,idx] = Y[i,idx] - Ymean[i]
        return [Ynorm, Ymean]

Unit test: not sure why the result is very different compared to the course. I will keep digging

'''
Created on Apr 26, 2018

@author: charles
'''
import unittest
import numpy as np
from recommender import Recommender

class Test(unittest.TestCase):


    def setUp(self):
        self.r = Recommender()


    def tearDown(self):
        self.r = None


    def test_cofiCostFunc(self):
        params = np.array(range(1, 15))/10.
        Y = np.sin(np.array([[16,2,3,13],
             [5,11,10,8],
             [9,7,6,12],
             [4,14,15,1]]))
        Y = Y[:, 0:3]
        R = (np.array([[1, 0, 1], [1, 1, 1], [0, 0, 1], [1, 1, 0]]) > 0.5).astype(int)
        num_users = 3
        num_movies = 4
        num_features = 2
        J, grad = self.r.CofiCostFunc(params, Y, R, \
                                    num_users, num_movies, \
                                    num_features, 0)
        print(J)
        print(grad)
        self.assertAlmostEqual(8.513, J, None, None, 1e-2)


if __name__ == "__main__":
    #import sys;sys.argv = ['', 'Test.testName']
    unittest.main()

Integeration check:

'''
Created on Apr 27, 2018

@author: charles
'''
import scipy.io
import matplotlib.pyplot as plt
import numpy as np
import scipy.optimize as opt
from recommender import Recommender

r = Recommender()

print('Loading movie ratings dataset.')
data = scipy.io.loadmat('ex8_movies.mat')
R = data['R']
Y = data['Y']
print('Average rating for movie 1 (Toy Story): %8.8f/5 ' \
        %np.mean(Y[0,np.where(R[0,:] -1 == 0)]))

plt.figure(figsize=(5, 5))
plt.imshow(Y)
plt.show()

data1 = scipy.io.loadmat('ex8_movieParams.mat')
X = data1['X']
theta = data1['Theta']
# Reduce the data set size so that this runs faster
num_users = 4
num_movies = 5
num_features = 3

X = X[0:num_movies, 0:num_features]
theta = theta[0:num_users, 0:num_features]
Y = Y[0:num_movies, 0:num_users]
R = R[0:num_movies, 0:num_users]

J, grad = r.CofiCostFunc(np.append(X.flatten(), theta.flatten(), 0), \
        Y, R, num_users, num_movies, num_features, 0)
print('Cost at loaded parameters: %2.2f (this value should be about 22.22)' %J)

print('Checking Gradients (without regularization) ...')
r.CheckCostFunction()

J, grad = r.CofiCostFunc(np.append(X.flatten(), theta.flatten(), 0), \
        Y, R, num_users, num_movies, num_features, 1.5)
print('Cost at loaded parameters (lambda = 1.5): %2.2f \
        (this value should be about 31.34)' %J)
        
print('Checking Gradients (with regularization) ...')
r.CheckCostFunction(1.5)

movielist = r.LoadMovieList()
my_ratings = np.zeros((1682, 1))

#Check the file movie_idx.txt for id of each movie in our dataset
#For example, Toy Story (1995) has ID 1, so to rate it "4", you can set
my_ratings[0] = 4

#Or suppose did not enjoy Silence of the Lambs (1991), you can set
my_ratings[97] = 2
# rate other movies
my_ratings[6] = 3
my_ratings[11] = 5
my_ratings[53] = 4
my_ratings[63] = 5
my_ratings[65] = 3
my_ratings[68] = 5
my_ratings[182] = 4
my_ratings[225] = 5
my_ratings[354] = 5

print('New user ratings:')
for i in range(len(my_ratings)):
    if my_ratings[i] > 0:
        print('Rated {} for {}'.format(my_ratings[i], movielist[i]))

print('Training collaborative filtering...')
data = scipy.io.loadmat('ex8_movies.mat')
R = data['R']
Y = data['Y']
Y = np.append(my_ratings, Y, 1)
R = np.append((my_ratings!=0)+0, R, 1)

Ynorm, Ymean =r.NormalizeRatings(Y,R)
num_users = np.shape(Y)[1]
num_movies = np.shape(Y)[0]
num_features = 10

X = np.random.randn(num_movies, num_features);
theta = np.random.randn(num_users, num_features);


initial_parameters = np.append(X.flatten(), theta.flatten(), 0)
Lambda = 10

# result = opt.fmin_cg(r.CofiCost, x0 = initial_parameters,\
#          args = (Y, R, num_users, num_movies, num_features, Lambda), \
#          maxiter = 50, fprime = r.CofiGradient, \
#          disp=True, full_output=True)
# 
# theta = result[0]

result = opt.minimize(fun = r.CofiCost, x0 = initial_parameters,\
         args = (Y, R, num_users, num_movies, num_features, Lambda), \
         method = 'CG', jac = r.CofiGradient, \
         options = {'maxiter':100})

theta = result.x

# Unfold the returned theta back into U and W
X = theta[0:num_movies*num_features].reshape(num_movies, num_features)
theta = theta[num_movies*num_features:].reshape(num_users, num_features)

print('Recommender system learning completed.')


p = X.dot(theta.T)
my_predictions = p[:,0].reshape(np.shape(p)[0],1) + Ymean

movielist = r.LoadMovieList()
ix = np.argsort(my_predictions, axis=0, kind ='mergesort')[::-1]
my_predictions = my_predictions[ix]
my_predictions = my_predictions.flatten()

print('Top recommendations for you:')
for i in range(10):
    j = ix[i,0] 
    print('Predicting rating %1.1f for movie %s' %(my_predictions[i],\
                        movielist[j]))
        
print('Original ratings provided:')
for i in range(len(my_ratings)):
    if my_ratings[i] > 0:
        print('Rated {} for {}'.format(my_ratings[i], movielist[i]))

New Comment

Qi Sun (Charles) PhD

Reinforcement learning: exploring reinforcement learning through the evolution of the Q-Learner

Data analytics

Recommender Systems Python 3 code