xlrd解析excelドキュメント
11900 ワード
pythonはexcelテーブルを解析し、xmlファイルを作成します.もちろん、このファイルは特定のフォーマットを持っています.仕事で使われているクラスを例にとると、比較的簡単です.現在、私は単一のインスタンスの生成方法しか実現していません.マルチレベルのインスタンスはしていません.後で詳しく考えてみると、ここでは2つのファイルが生成されます.1つはxmlで、1つはテキストです.テキストファイルには、各インスタンスのサブプロパティが記録されています~行メソッドでファイルに書き込まれます~!xlrdのpythonライブラリを使っていますが、機能が強いのもpythonでこの問題を解決した理由です.
#!/usr/bin/python
import sys
import xlrd
''' Define the necessary database '''
DataTypeValue={'string':'String','unsignedInt':'Unsigned','Boolean':'Bool','Int':'Int','dateTime':'dateTime'}
AccessTypeValue={'r':'readOnly','w':'readWrite'}
NOTIFICATION="none"
NEEDREBOOT="no"
ACCESSLIST="subscriber"
ACSSPECIAL=""
StyleValue={'w':'status','r':'configuration'}
DEFAULT_VALUE_RANGE={'String':'0-255','Unsigned':'0-4294967295','Int':'-32768-32767','Bool':'0-1','dateTime':'0-255'}
DEFAULT_BOOL_VALUE='0'
DEFAULT_STRING_VALUE=''
MAX_UNINT_VALUE='4294967295'
MAX_INT_VALUE='32767' #Assumed to be 32767 here temporary
CELL_VALUE_TYPE=1
CELL_RW_TYPE=2
class Node():
def __init__(self,name,type,acsspecial,nextlevelnum):
self.name=name
self.type="Object"
self.acsspecial=ACSSPECIAL
self.nextlevelnum=nextlevelnum
class Attribute():
def __init__(self,name,type,access,notification,needReboot,accessList,style,valueRange,acsSpecial,defaultValue):
self.name=name
self.type=type
self.access=access
self.notification=NOTIFICATION #default to none
self.needReboot=NEEDREBOOT #default to no
self.accessList=ACCESSLIST
self.style=style
self.valueRange=valueRange
self.acsSpecial=ACSSPECIAL #default to null
self.defaultValue=defaultValue
class NodeLevelInfo():
def __init__(self,name,nextlevelnum):
self.name=name
self.nextlevelnum=nextlevelnum
'''Just for debugging'''
def print_node_info(in_para):
print "Node name:",in_para.name,
print "acsSpecial:",in_para.acsspecial
print "NextLevelNum:",in_para.nextlevelnum
print "-----------------------------------------"
def print_attr_info(in_para):
print "Member name:",in_para.name
print "DataType:",in_para.type
print "accessType:",in_para.access
print "Notification:",in_para.notification
print "NeedReboot:",in_para.needReboot
print "AccessList:",in_para.accessList
print "Style:",in_para.style
print "ValueRange:",in_para.valueRange
print "acsSpecial:",in_para.acsSpecial
print "DefaultValue:",in_para.defaultValue
print "-----------------------------------------"
def NodeToString(data):
return ''+'
'
def NodeTailString(node_name):
return ''+node_name+' '+ '>'+'
'
def AttrToString(data):
string_out = ''+'
'
return string_out
def LevelInfoToString(data):
return 'Node:'+data.name+', '+'NextLevelNum:'+repr(data.nextlevelnum)+'
'
def write_data_file(data,filename, mode):
if len(filename)==0 or mode=='r':
return
fobj = open(filename, mode)
fobj.writelines(data)
#fobj.write(data)
fobj.close()
def retrieve_attribute_instance(row_index, sh):
#print "Retrieve attribute data"
#VALID_COLUM=3 #We just need the pre-three colums,actually.
#print "ValueType:%s,R/W:%s" % (sh.cell_value(i,CELL_VALUE_TYPE),sh.cell_value(i,CELL_RW_TYPE))
name=TMP_NODE_BASENAME+str.strip()
#type=''access='',style='',valueRange='',defaultValue=''
#get value type.
str_valuetype=sh.cell_value(row_index,CELL_VALUE_TYPE)
if 'string'in str_valuetype:
type=DataTypeValue['string']
elif 'unsignedInt' in str_valuetype:
type=DataTypeValue['unsignedInt']
elif 'Boolean' in str_valuetype:
type=DataTypeValue['Boolean']
elif 'Int' in str_valuetype:
type=DataTypeValue['Int']
elif 'dateTime' in str_valuetype:
type=DataTypeValue['dateTime']
#get valueRange
start_index=-1
colon_index=-1
end_index=-1
if '[' in str_valuetype:
start_index=str_valuetype.find('[')
end_index=str_valuetype.find(']')
if '(' in str_valuetype:
start_index=str_valuetype.find('(')
end_index=str_valuetype.find(')')
if ':' in str_valuetype:
colon_index=str_valuetype.find(':')
if start_index==-1 or end_index==-1:
#No specified valueRange,set to default.
#print name
valueRange=DEFAULT_VALUE_RANGE[type]
else:
if colon_index == 0 or colon_index == -1:
#No colon tag found,just return the value
#print 'name:%s,valueType:%s' % (name,str_valuetype)
#print 'start_index:%d,end_index:%d,str:%s' % (start_index, end_index, str_valuetype[start_index+1:end_index])
valueRange='0'+'-'+str_valuetype[start_index+1:end_index]
else:
#colon tag is found,need to reallocate the range value.
if len(str_valuetype[start_index+1:colon_index])==0:
valueRange='0'+'-'+str_valuetype[colon_index+1:end_index]
elif len(str_valuetype[colon_index+1:end_index])==0:
if type==DataTypeValue['unsignedInt']:
valueRange=str_valuetype[start_index+1:colon_index]+'-'+MAX_UNINT_VALUE
elif type==DataTypeValue['Int']:
valueRange=str_valuetype[start_index+1:colon_index]+'-'+MAX_INT_VALUE
else:
valueRange=str_valuetype[start_index+1:colon_index]+'-'+str_valuetype[colon_index+1:end_index]
#get r/w type
str_valuetype=sh.cell_value(row_index,CELL_RW_TYPE)
if 'r' == str_valuetype:
access=AccessTypeValue['r']
else:
access=AccessTypeValue['w']
#get style.
style=StyleValue[str_valuetype]
#get defaultvalue.
if type== DataTypeValue['string'] or type==DataTypeValue['dateTime']:
defaultValue=DEFAULT_STRING_VALUE
elif type==DataTypeValue['unsignedInt'] or type==DataTypeValue['Int']:
defaultValue='0'
elif type==DataTypeValue['Boolean']:
defaultValue=DEFAULT_BOOL_VALUE
return Attribute(name,type,access,NOTIFICATION,NEEDREBOOT,ACCESSLIST,style,valueRange,ACSSPECIAL,defaultValue)
#att=attribute("test","Unsigned","readOnly","none","","","status","0-4294967295","","0")
#print_attr_info(att)
#tmp_data=AttrToString(att)
#print tmp_data
#write_data_file(tmp_data,'configuration.xml','a')
#Some initial value definition
CONFIG_XML_FILE='configuration.xml' #The dest xml file
NEXT_LEVEL_NUM_FILE='next_level_info.txt'
TMP_NODE_BASENAME="" #Record the father node's name
BRANCH_NODE_NEXT_LEVEL_NUM=0 #Record the nextlevel num
ROOT_NEXT_LEVEL_NUM=0
ROOT_NODE_NAME='InternetGatewayDevice.'
ROOT_TAIL_TAG='
'
ROOT_HEAD_TAG='
'
#print ROOT_HEAD_TAG
#Load excel file for processing
fname = "sample.xls"
bk = xlrd.open_workbook(fname)
shxrange = range(bk.nsheets)
try:
sh = bk.sheet_by_name("Sheet1")
except:
print "no sheet in %s named Sheet1" % fname
sys.exit(1)
nrows = sh.nrows
ncols = sh.ncols
print "total rows: %d, total cols: %d" % (nrows,ncols)
cell_value = sh.cell_value(2,0)
#print "The 1st column title: %s" % (cell_value)
#print "type:",sh.cell_type(3,0)
row_list = []
for i in range(0,nrows):
row_data = sh.row_values(i)
#print sh.row_len(i)
row_list.append(row_data)
for i in range(0,nrows):
str = sh.cell_value(i,0)
if str.startswith(ROOT_NODE_NAME):
start_index = i
#print 'start_index:',i
break
#Write root node head tag
write_data_file(ROOT_HEAD_TAG,CONFIG_XML_FILE,'a')
for i in range(start_index,nrows):
#print "%s,len:%d" % (sh.cell_value(i,0),len(sh.cell_value(i,0)))
#str = sh.cell_value(i,0)
#len_str = len(sh.cell_value(i,0))
str = sh.cell_value(i,0)
#print "i:%d,%s" % (i, str)
if len(str) == 0:
continue
#print str
if str[-1:] == '.':
#dump a node to file
#node_data=node(str,"Object",ACSSPECIAL,0)
if str != ROOT_NODE_NAME:
ROOT_NEXT_LEVEL_NUM += 1
if TMP_NODE_BASENAME != str:
if len(TMP_NODE_BASENAME) > 0 and str != ROOT_NODE_NAME and TMP_NODE_BASENAME != ROOT_NODE_NAME:
#We should write xml tail tag before write a new item
tmp_data=NodeTailString(TMP_NODE_BASENAME)
write_data_file(tmp_data,CONFIG_XML_FILE,'a')
#print 'Node:%s,NextLevelNum:%d' % (TMP_NODE_BASENAME,BRANCH_NODE_NEXT_LEVEL_NUM)
tmp_level_info = NodeLevelInfo(TMP_NODE_BASENAME,BRANCH_NODE_NEXT_LEVEL_NUM)
tmp_data=LevelInfoToString(tmp_level_info)
write_data_file(tmp_data,NEXT_LEVEL_NUM_FILE,'a')
BRANCH_NODE_NEXT_LEVEL_NUM = 0
TMP_NODE_BASENAME=str
if TMP_NODE_BASENAME != ROOT_NODE_NAME:
node_data=Node(str,"Object",ACSSPECIAL,0)
tmp_data=NodeToString(node_data)
#print TMP_NODE_BASENAME
write_data_file(tmp_data,CONFIG_XML_FILE,'a')
else:
#dump attribute to file
if TMP_NODE_BASENAME == ROOT_NODE_NAME:
ROOT_NEXT_LEVEL_NUM += 1
else:
BRANCH_NODE_NEXT_LEVEL_NUM += 1
#Write attribute to file
#VALID_COLUM=3 #We just need the pre-three colums,actually.
#print TMP_NODE_BASENAME+str.strip(),
#print "ValueType:%s,R/W:%s" % (sh.cell_value(i,CELL_VALUE_TYPE),sh.cell_value(i,CELL_RW_TYPE))
tmp_attr=retrieve_attribute_instance(i,sh)
#print_attr_info(tmp_attr)
tmp_data=AttrToString(tmp_attr)
write_data_file(tmp_data,CONFIG_XML_FILE,'a')
#Has travelled to the last branch
if i == nrows -1:
#write xml tail tag.
#print 'Node:%s,NextLevelNum:%d' % (TMP_NODE_BASENAME,BRANCH_NODE_NEXT_LEVEL_NUM)
tmp_level_info = NodeLevelInfo(TMP_NODE_BASENAME,BRANCH_NODE_NEXT_LEVEL_NUM)
tmp_data=LevelInfoToString(tmp_level_info)
write_data_file(tmp_data,NEXT_LEVEL_NUM_FILE,'a')
tmp_data=NodeTailString(TMP_NODE_BASENAME)
write_data_file(tmp_data,CONFIG_XML_FILE,'a')
#Write root node tail tag
write_data_file(ROOT_TAIL_TAG,CONFIG_XML_FILE,'a')
#Write root node nextlevelnum info
tmp_level_info = NodeLevelInfo(ROOT_NODE_NAME,ROOT_NEXT_LEVEL_NUM)
tmp_data=LevelInfoToString(tmp_level_info)
write_data_file(tmp_data,NEXT_LEVEL_NUM_FILE,'a')
#unload sheet to free memory
bk.unload_sheet("Sheet1")
print 'File create Done!'