Pythonでxlsxファイルを読み込む方法
スクリプトは以下の通りです
あるエクセルにはある人の通話記録が記録されています。以下のプログラムは通話場所、通話タイプなどによって通話時間を統計します。
from openpyxl import load_workbook
workbook = load_workbook(u'/tmp/test.xlsx') # xlsx
booksheet = workbook.active # sheet, sheet
# sheet , sheet , 。
# sheets = workbook.get_sheet_names() # sheet
# booksheet = workbook.get_sheet_by_name(sheets[0])
# sheet
rows = booksheet.rows
# sheet
columns = booksheet.columns
i = 0
#
for row in rows:
i = i + 1
line = [col.value for col in row]
cell_data_1 = booksheet.cell(row=i, column=3).value # i 1
cell_data_2 = booksheet.cell(row=i, column=4).value # i 2
cell_data_3 = booksheet.cell(row=i, column=8).value # i 3
cell_data_4 = booksheet.cell(row=i, column=18).value # i 4
print (cell_data_1, cell_data_2, cell_data_3, cell_data_4)
例:pythonでエクセルデータを読み込み、分類統計を行う。あるエクセルにはある人の通話記録が記録されています。以下のプログラムは通話場所、通話タイプなどによって通話時間を統計します。
# -*- coding:utf-8 -*-
import xlrd
import datetime
infos=[]
info_file=xlrd.open_workbook('src.xls')# excel
info_sheet=info_file.sheets()[0]#
row_count=info_sheet.nrows# , ncols
for row in range(1,row_count):
time_string=info_sheet.cell(row,3).value
time_s_sp=time_string.split(':')
infos.append(# row_count
{
'type':info_sheet.cell(row,2).value,# , ,
'other_cellphone':info_sheet.cell(row,0).value,# ,
'timespan':datetime.timedelta(seconds=int(time_s_sp[2]),minutes=int(time_s_sp[1]),hours=int(time_s_sp[0])),
'gpscity':info_sheet.cell(row,5).value#
}
)
time_all=datetime.timedelta(seconds=0)#
time_types={}
time_classes={}
time_numbers={}
for infor in infos:#
time_all +=infor['timespan']#
infor_type=infor['type']
if infor_type in time_types:
time_types[infor_type]+=infor['timespan']
else:
time_types[infor_type]=infor['timespan']#
infor_class=infor['gpscity']
if infor_class in time_classes:
time_classes[infor_class]+=infor['timespan']
else:
time_classes[infor_class]=infor['timespan']# ++,
infor_number=infor['other_cellphone']
if infor_number in time_numbers:
time_numbers[infor_number]+=infor['timespan']
else:
time_numbers[infor_number]=infor['timespan']#
print ' :%s' % time_all
print
print ' '
for k,v in time_types.items():
print k.encode('utf-8'),v
print
print ' :'
for k,v in time_classes.items():
print k.encode('utf-8'),v
print
print ' :'
for k,v in time_numbers.items():
print k,v
コードを最適化する
# -*- coding:utf-8 -*-
import xlrd
from datetime import timedelta
def read_excel(file_excel):# excel
infos=[]
info_file=xlrd.open_workbook(file_excel)
info_sheet=info_file.sheets()[0]
row_count=info_sheet.nrows
for row in range(1,row_count):
time_string=info_sheet.cell(row,3).value
time_s_sp=time_string.split(':')
infos.append(
{
'type':info_sheet.cell(row,2).value,
'other_cellphone':info_sheet.cell(row,0).value,
'timespan':timedelta(seconds=int(time_s_sp[2]),minutes=int(time_s_sp[1]),hours=int(time_s_sp[0])),
'gpscity':info_sheet.cell(row,5).value
}
)
return infos
def count_cell(list_dirs,infotype):# ,
result_dir={}
time_all=timedelta(seconds=0)
for list_dir in list_dirs:
time_all +=list_dir['timespan']
info_type = list_dir[infotype]
if info_type not in result_dir:
result_dir[info_type]=list_dir['timespan']
else:
result_dir[info_type]+=list_dir['timespan']
return time_all,result_dir
def print_result(result_dir):#
for k,v in result_dir.items():
print k.encode('utf-8'),v
if __name__=="__main__":
list_dirs=read_excel('src.xls')
time_all,result_type=count_cell(list_dirs,'type')
result_cell=count_cell(list_dirs,'other_cellphone')
result_gpscity = count_cell(list_dirs, 'gpscity')
print ' :%s' % time_all
print ' :'
print_result(result_type)
print ' :'
print_result(result_cell[1])
print ' :'
print_result(result_gpscity[1])
以上が本文の全部です。皆さんの勉強に役に立つように、私たちを応援してください。