台大hw 1-予測pm 25-手動でgradient descentを実現

25713 ワード

Homework 1 - PM2.5 Prediction
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import matplotlib as mpl

#             :
# 9+1=10  feature,9     pm2.5,bias
#   train data
#     pm25       list 

#%%
def train():
    all_pm25 = []
    with open("train.csv",encoding="big5") as f:
        for line in f:
            if "PM2.5" in line:
                temp = line.strip().split(",",3)
                pm25 = temp[3].split(',')
                pm25 = [float(i) for i in pm25]
                all_pm25 += pm25
    #   features label
    X = []
    Y = []
    # method 1:
    # for i in range(len(all_pm25)-10):
    #     X.append(all_pm25[i:i+9]) #    9 data  feature
    #     y.append(all_pm25[i+9]) #    data  label
    # method 2:          
    for i in range(0, len(all_pm25), 10):
        X.append(all_pm25[i:i+9])
        Y.append(all_pm25[i+9])
    return X,Y

def test():
    X = []
    with open("test.csv",encoding="big5") as f:
        for line in f:
            if "PM2.5" in line:
                temp = line.strip().split(",",2)
                pm25 = temp[2].split(',')
                pm25 = [float(i) for i in pm25]
                X.append(pm25)
    return X

X,Y = train()
X_test = test()


def loss(w,b):
    loss = 0
    for x,y in zip(X,Y):
        sum_wi_xi = np.dot(w,x)
        one_loss = (sum_wi_xi + b - y)**2
        loss = loss + one_loss
    return loss

#%%
def sgd(iteration):
    """sgd      adagrad
    """
    # random pick initial parameters
    w = np.ones(9)  # [1,1,1,...]
    b = 1           # 1
    lr = 0.00001

    w_history = [w]
    b_history = [b]
    loss_history = []

    print("iter:",iteration)
    for i in range(iteration):
        for x,y in zip(X,Y):
            sum_wi_xi = np.dot(w,x)
            w_grad = 2*(sum_wi_xi + b - y) * np.array(x)
            w = w - np.multiply(w_grad, lr)

            b_grad = 2*(sum_wi_xi + b - y)
            b = b - b_grad * lr

        w_history.append(w)
        b_history.append(b)
        current_loss = loss(w,b)
        loss_history.append(current_loss)
    
    plt.plot(np.arange(len(loss_history)),loss_history, "o")
    plt.show()
    return (w_history, b_history, loss_history)



for i in [100,130]:
    w_hist_sgd, b_hist_sgd, loss_hist_sgd = sgd(i)
    plt.plot(np.arange(len(loss_hist_sgd[-20:])),loss_hist_sgd[-20:], "o")
    plt.show()


def sgd_predict(w, b, X):
    """
    :input: [[x1, x2, ..., x9],
             [x1, x2, ..., x9]]
    :output: [y, y]
    """
    y_hat = []
    for x in X:
        predict = np.dot(w, x) + b
        y_hat.append(predict)
    return y_hat

w_hat = w_hist_sgd[-1]
b_hat = b_hist_sgd[-1]
#             test      
sgd_predict(w_hat,b_hat,X_test)


#%%
def adagrad(iteration):
    w = np.ones(9)  # [1,1,1,...]
    b = 1           # 1
    lr = 0.000001

    w_history = [w]
    b_history = [b]
    loss_history = []
    
    print("iter:",iteration)
    for i in range(iteration):
        w_grad = np.zeros(9)
        b_grad = 0
        for x,y in zip(X,Y):
            sum_wi_xi = np.dot(w,x)
            w_grad_one = 2*(sum_wi_xi + b - y) * np.array(x)
            b_grad_one = 2*(sum_wi_xi + b - y)
            w_grad = w_grad_one + w_grad
            b_grad = b_grad_one + b_grad
            
        lr_w = lr/np.sqrt(np.sum(np.array(w_history)**2, axis=0))
        lr_b = lr/np.sqrt(np.sum(np.array(b_history)**2))
        w = w - np.multiply(w_grad, lr_w)
        b = b - b_grad * lr_b

        w_history.append(w)
        b_history.append(b)
        current_loss = loss(w,b)
        loss_history.append(current_loss)
    
    plt.plot(np.arange(len(loss_history)),loss_history, "o")
    plt.show()
    return (w_history, b_history, loss_history)

for i in [1000]:
    w_hist, b_hist, loss_hist = adagrad(i)
    plt.plot(np.arange(len(loss_hist[-40:])),loss_hist[-40:], "o")
    plt.show()