Pythonベースノート(2)
3092 ワード
import numpy as np
import csv
#
def get_alldata(filename):
# , , ,
# , 。
with open(filename, 'r') as csv_file:
col_name_all = csv_file.readline()[:-1]
colname_str = "Clothing ID,Recommended IND,Positive Feedback Count,Class Name"
col_name_all = col_name_all.split(",")
colname_lst = colname_str.split(",")
colindex_lst = []
for i in col_name_all:
if i in colname_lst:
colindex_lst.append(col_name_all.index(i))
dataset = []
dataset1 = csv.reader(csv_file)
for row in dataset1:
adata = list(row[i] for i in colindex_lst)
dataset.append(adata)
print(dataset)
return np.array(dataset)
def get_id_count_arr(dataset):
# ,
# count ,
# 400 。
mdict = {}
id_count_list = []
for row in dataset:
if row[0] in mdict:
mdict[row[0]] = mdict[row[0]] + 1
else:
mdict[row[0]] = 0
for k in mdict:
if mdict[k] >= 400:
id_count_list.append(k)
return np.array(id_count_list)
def cal_recom_num(dataset, id_lst):
#
# ,
# ,
#
id_recom_ratio_lst = []
for i in range(len(id_lst)):
mnum = 0
mrnum = 0
for j in range(dataset.shape[0]):
if id_lst[i] == dataset[j][0]:
if dataset[j][1] == '1':
mrnum += 1
mnum += 1
id_recom_ratio_lst.append(mrnum / mnum)
return id_recom_ratio_lst
def cal_pos_num(dataset, id_lst):
# , ,
# ,
# 。
id_pos_sum_lst = [] # Clothing ID
id_name_lst = [] # Clothing ID
strdata = ''
for i in range(len(id_lst)):
sum = 0 #
for j in range(dataset.shape[0]):
if id_lst[i] == dataset[j][0]:
sum += int(dataset[j][2])
strdata = dataset[j][3]
id_pos_sum_lst.append(sum)
id_name_lst.append(strdata)
return id_pos_sum_lst, id_name_lst
if __name__ == "__main__":
# , , ,
filename = "Z:\\womens_clothing_e-commerce_reviews.csv"
dataset = get_alldata(filename)
print(" dataset : {}".format(dataset.shape))
id_count_lst = get_id_count_arr(dataset)
print(" 400 Clothing ID {} , {}".format(len(id_count_lst), id_count_lst))
recom_ratio_lst = cal_recom_num(dataset, id_count_lst)
id_pos_sum_lst, id_name_lst = cal_pos_num(dataset, id_count_lst)
id_data_arrs = np.array((id_count_lst, id_name_lst, recom_ratio_lst, id_pos_sum_lst)).T
for id_data in id_data_arrs:
print("Clothing ID {} , {}, : {}, : {}"
.format(id_data[0], id_data[1], id_data[2], id_data[3]))