Pythonベースノート(2)

3092 ワード

import numpy as np
import csv


#        


def get_alldata(filename):
    #       ,      ,               ,
    #                       ,         。
    with open(filename, 'r') as csv_file:
        col_name_all = csv_file.readline()[:-1]
        colname_str = "Clothing ID,Recommended IND,Positive Feedback Count,Class Name"
        col_name_all = col_name_all.split(",")
        colname_lst = colname_str.split(",")

        colindex_lst = []
        for i in col_name_all:
            if i in colname_lst:
                colindex_lst.append(col_name_all.index(i))

        dataset = []
        dataset1 = csv.reader(csv_file)
        for row in dataset1:
            adata = list(row[i] for i in colindex_lst)
            dataset.append(adata)
        print(dataset)
        return np.array(dataset)


def get_id_count_arr(dataset):
    #                   ,
    #       count              ,
    #      400      。
    mdict = {}
    id_count_list = []
    for row in dataset:
        if row[0] in mdict:
            mdict[row[0]] = mdict[row[0]] + 1
        else:
            mdict[row[0]] = 0
    for k in mdict:
        if mdict[k] >= 400:
            id_count_list.append(k)
    return np.array(id_count_list)


def cal_recom_num(dataset, id_lst):
    #                
    #                   ,
    #         ,
    #              
    id_recom_ratio_lst = []
    for i in range(len(id_lst)):
        mnum = 0
        mrnum = 0
        for j in range(dataset.shape[0]):
            if id_lst[i] == dataset[j][0]:
                if dataset[j][1] == '1':
                    mrnum += 1
                mnum += 1
        id_recom_ratio_lst.append(mrnum / mnum)
    return id_recom_ratio_lst


def cal_pos_num(dataset, id_lst):
    #       ,       ,
    #       ,
    #              。
    id_pos_sum_lst = []  #   Clothing ID            
    id_name_lst = []  #   Clothing ID     
    strdata = ''
    for i in range(len(id_lst)):
        sum = 0  #      
        for j in range(dataset.shape[0]):
            if id_lst[i] == dataset[j][0]:
                sum += int(dataset[j][2])
                strdata = dataset[j][3]
        id_pos_sum_lst.append(sum)
        id_name_lst.append(strdata)
    return id_pos_sum_lst, id_name_lst


if __name__ == "__main__":
    #          ,    ,    ,    
    filename = "Z:\\womens_clothing_e-commerce_reviews.csv"
    dataset = get_alldata(filename)
    print("   dataset    : {}".format(dataset.shape))
    id_count_lst = get_id_count_arr(dataset)
    print("      400   Clothing ID  {} ,   {}".format(len(id_count_lst), id_count_lst))
    recom_ratio_lst = cal_recom_num(dataset, id_count_lst)
    id_pos_sum_lst, id_name_lst = cal_pos_num(dataset, id_count_lst)
    id_data_arrs = np.array((id_count_lst, id_name_lst, recom_ratio_lst, id_pos_sum_lst)).T
    for id_data in id_data_arrs:
        print("Clothing ID  {} ,      {},       : {},        : {}"
              .format(id_data[0], id_data[1], id_data[2], id_data[3]))