xlrd解析excelドキュメント


pythonはexcelテーブルを解析し、xmlファイルを作成します.もちろん、このファイルは特定のフォーマットを持っています.仕事で使われているクラスを例にとると、比較的簡単です.現在、私は単一のインスタンスの生成方法しか実現していません.マルチレベルのインスタンスはしていません.後で詳しく考えてみると、ここでは2つのファイルが生成されます.1つはxmlで、1つはテキストです.テキストファイルには、各インスタンスのサブプロパティが記録されています~行メソッドでファイルに書き込まれます~!xlrdのpythonライブラリを使っていますが、機能が強いのもpythonでこの問題を解決した理由です.
#!/usr/bin/python

import sys
import xlrd

''' Define the necessary database '''
DataTypeValue={'string':'String','unsignedInt':'Unsigned','Boolean':'Bool','Int':'Int','dateTime':'dateTime'}
AccessTypeValue={'r':'readOnly','w':'readWrite'}
NOTIFICATION="none"
NEEDREBOOT="no"
ACCESSLIST="subscriber"
ACSSPECIAL=""
StyleValue={'w':'status','r':'configuration'} 
DEFAULT_VALUE_RANGE={'String':'0-255','Unsigned':'0-4294967295','Int':'-32768-32767','Bool':'0-1','dateTime':'0-255'}
DEFAULT_BOOL_VALUE='0'
DEFAULT_STRING_VALUE=''
MAX_UNINT_VALUE='4294967295'
MAX_INT_VALUE='32767' #Assumed to be 32767 here temporary
CELL_VALUE_TYPE=1
CELL_RW_TYPE=2


class Node():
    def __init__(self,name,type,acsspecial,nextlevelnum):
        self.name=name
        self.type="Object"
        self.acsspecial=ACSSPECIAL
        self.nextlevelnum=nextlevelnum

class Attribute():
    def __init__(self,name,type,access,notification,needReboot,accessList,style,valueRange,acsSpecial,defaultValue):
        self.name=name
        self.type=type
        self.access=access
        self.notification=NOTIFICATION #default to none
        self.needReboot=NEEDREBOOT      #default to no
        self.accessList=ACCESSLIST
        self.style=style
        self.valueRange=valueRange
        self.acsSpecial=ACSSPECIAL  #default to null
        self.defaultValue=defaultValue

class NodeLevelInfo():
    def __init__(self,name,nextlevelnum):
        self.name=name
        self.nextlevelnum=nextlevelnum

'''Just for debugging'''
def print_node_info(in_para):
    print "Node name:",in_para.name,
    print "acsSpecial:",in_para.acsspecial
    print "NextLevelNum:",in_para.nextlevelnum
    print "-----------------------------------------"

def print_attr_info(in_para):
    print "Member name:",in_para.name
    print "DataType:",in_para.type
    print "accessType:",in_para.access
    print "Notification:",in_para.notification
    print "NeedReboot:",in_para.needReboot
    print "AccessList:",in_para.accessList
    print "Style:",in_para.style
    print "ValueRange:",in_para.valueRange
    print "acsSpecial:",in_para.acsSpecial
    print "DefaultValue:",in_para.defaultValue
    print "-----------------------------------------"

def NodeToString(data):
    return ''+'
' def NodeTailString(node_name): return ''+node_name+' '+ '>'+'
' def AttrToString(data): string_out = ''+'
' return string_out def LevelInfoToString(data): return 'Node:'+data.name+', '+'NextLevelNum:'+repr(data.nextlevelnum)+'
' def write_data_file(data,filename, mode): if len(filename)==0 or mode=='r': return fobj = open(filename, mode) fobj.writelines(data) #fobj.write(data) fobj.close() def retrieve_attribute_instance(row_index, sh): #print "Retrieve attribute data" #VALID_COLUM=3 #We just need the pre-three colums,actually. #print "ValueType:%s,R/W:%s" % (sh.cell_value(i,CELL_VALUE_TYPE),sh.cell_value(i,CELL_RW_TYPE)) name=TMP_NODE_BASENAME+str.strip() #type=''access='',style='',valueRange='',defaultValue='' #get value type. str_valuetype=sh.cell_value(row_index,CELL_VALUE_TYPE) if 'string'in str_valuetype: type=DataTypeValue['string'] elif 'unsignedInt' in str_valuetype: type=DataTypeValue['unsignedInt'] elif 'Boolean' in str_valuetype: type=DataTypeValue['Boolean'] elif 'Int' in str_valuetype: type=DataTypeValue['Int'] elif 'dateTime' in str_valuetype: type=DataTypeValue['dateTime'] #get valueRange start_index=-1 colon_index=-1 end_index=-1 if '[' in str_valuetype: start_index=str_valuetype.find('[') end_index=str_valuetype.find(']') if '(' in str_valuetype: start_index=str_valuetype.find('(') end_index=str_valuetype.find(')') if ':' in str_valuetype: colon_index=str_valuetype.find(':') if start_index==-1 or end_index==-1: #No specified valueRange,set to default. #print name valueRange=DEFAULT_VALUE_RANGE[type] else: if colon_index == 0 or colon_index == -1: #No colon tag found,just return the value #print 'name:%s,valueType:%s' % (name,str_valuetype) #print 'start_index:%d,end_index:%d,str:%s' % (start_index, end_index, str_valuetype[start_index+1:end_index]) valueRange='0'+'-'+str_valuetype[start_index+1:end_index] else: #colon tag is found,need to reallocate the range value. if len(str_valuetype[start_index+1:colon_index])==0: valueRange='0'+'-'+str_valuetype[colon_index+1:end_index] elif len(str_valuetype[colon_index+1:end_index])==0: if type==DataTypeValue['unsignedInt']: valueRange=str_valuetype[start_index+1:colon_index]+'-'+MAX_UNINT_VALUE elif type==DataTypeValue['Int']: valueRange=str_valuetype[start_index+1:colon_index]+'-'+MAX_INT_VALUE else: valueRange=str_valuetype[start_index+1:colon_index]+'-'+str_valuetype[colon_index+1:end_index] #get r/w type str_valuetype=sh.cell_value(row_index,CELL_RW_TYPE) if 'r' == str_valuetype: access=AccessTypeValue['r'] else: access=AccessTypeValue['w'] #get style. style=StyleValue[str_valuetype] #get defaultvalue. if type== DataTypeValue['string'] or type==DataTypeValue['dateTime']: defaultValue=DEFAULT_STRING_VALUE elif type==DataTypeValue['unsignedInt'] or type==DataTypeValue['Int']: defaultValue='0' elif type==DataTypeValue['Boolean']: defaultValue=DEFAULT_BOOL_VALUE return Attribute(name,type,access,NOTIFICATION,NEEDREBOOT,ACCESSLIST,style,valueRange,ACSSPECIAL,defaultValue) #att=attribute("test","Unsigned","readOnly","none","","","status","0-4294967295","","0") #print_attr_info(att) #tmp_data=AttrToString(att) #print tmp_data #write_data_file(tmp_data,'configuration.xml','a') #Some initial value definition CONFIG_XML_FILE='configuration.xml' #The dest xml file NEXT_LEVEL_NUM_FILE='next_level_info.txt' TMP_NODE_BASENAME="" #Record the father node's name BRANCH_NODE_NEXT_LEVEL_NUM=0 #Record the nextlevel num ROOT_NEXT_LEVEL_NUM=0 ROOT_NODE_NAME='InternetGatewayDevice.' ROOT_TAIL_TAG='
' ROOT_HEAD_TAG='
' #print ROOT_HEAD_TAG #Load excel file for processing fname = "sample.xls" bk = xlrd.open_workbook(fname) shxrange = range(bk.nsheets) try: sh = bk.sheet_by_name("Sheet1") except: print "no sheet in %s named Sheet1" % fname sys.exit(1) nrows = sh.nrows ncols = sh.ncols print "total rows: %d, total cols: %d" % (nrows,ncols) cell_value = sh.cell_value(2,0) #print "The 1st column title: %s" % (cell_value) #print "type:",sh.cell_type(3,0) row_list = [] for i in range(0,nrows): row_data = sh.row_values(i) #print sh.row_len(i) row_list.append(row_data) for i in range(0,nrows): str = sh.cell_value(i,0) if str.startswith(ROOT_NODE_NAME): start_index = i #print 'start_index:',i break #Write root node head tag write_data_file(ROOT_HEAD_TAG,CONFIG_XML_FILE,'a') for i in range(start_index,nrows): #print "%s,len:%d" % (sh.cell_value(i,0),len(sh.cell_value(i,0))) #str = sh.cell_value(i,0) #len_str = len(sh.cell_value(i,0)) str = sh.cell_value(i,0) #print "i:%d,%s" % (i, str) if len(str) == 0: continue #print str if str[-1:] == '.': #dump a node to file #node_data=node(str,"Object",ACSSPECIAL,0) if str != ROOT_NODE_NAME: ROOT_NEXT_LEVEL_NUM += 1 if TMP_NODE_BASENAME != str: if len(TMP_NODE_BASENAME) > 0 and str != ROOT_NODE_NAME and TMP_NODE_BASENAME != ROOT_NODE_NAME: #We should write xml tail tag before write a new item tmp_data=NodeTailString(TMP_NODE_BASENAME) write_data_file(tmp_data,CONFIG_XML_FILE,'a') #print 'Node:%s,NextLevelNum:%d' % (TMP_NODE_BASENAME,BRANCH_NODE_NEXT_LEVEL_NUM) tmp_level_info = NodeLevelInfo(TMP_NODE_BASENAME,BRANCH_NODE_NEXT_LEVEL_NUM) tmp_data=LevelInfoToString(tmp_level_info) write_data_file(tmp_data,NEXT_LEVEL_NUM_FILE,'a') BRANCH_NODE_NEXT_LEVEL_NUM = 0 TMP_NODE_BASENAME=str if TMP_NODE_BASENAME != ROOT_NODE_NAME: node_data=Node(str,"Object",ACSSPECIAL,0) tmp_data=NodeToString(node_data) #print TMP_NODE_BASENAME write_data_file(tmp_data,CONFIG_XML_FILE,'a') else: #dump attribute to file if TMP_NODE_BASENAME == ROOT_NODE_NAME: ROOT_NEXT_LEVEL_NUM += 1 else: BRANCH_NODE_NEXT_LEVEL_NUM += 1 #Write attribute to file #VALID_COLUM=3 #We just need the pre-three colums,actually. #print TMP_NODE_BASENAME+str.strip(), #print "ValueType:%s,R/W:%s" % (sh.cell_value(i,CELL_VALUE_TYPE),sh.cell_value(i,CELL_RW_TYPE)) tmp_attr=retrieve_attribute_instance(i,sh) #print_attr_info(tmp_attr) tmp_data=AttrToString(tmp_attr) write_data_file(tmp_data,CONFIG_XML_FILE,'a') #Has travelled to the last branch if i == nrows -1: #write xml tail tag. #print 'Node:%s,NextLevelNum:%d' % (TMP_NODE_BASENAME,BRANCH_NODE_NEXT_LEVEL_NUM) tmp_level_info = NodeLevelInfo(TMP_NODE_BASENAME,BRANCH_NODE_NEXT_LEVEL_NUM) tmp_data=LevelInfoToString(tmp_level_info) write_data_file(tmp_data,NEXT_LEVEL_NUM_FILE,'a') tmp_data=NodeTailString(TMP_NODE_BASENAME) write_data_file(tmp_data,CONFIG_XML_FILE,'a') #Write root node tail tag write_data_file(ROOT_TAIL_TAG,CONFIG_XML_FILE,'a') #Write root node nextlevelnum info tmp_level_info = NodeLevelInfo(ROOT_NODE_NAME,ROOT_NEXT_LEVEL_NUM) tmp_data=LevelInfoToString(tmp_level_info) write_data_file(tmp_data,NEXT_LEVEL_NUM_FILE,'a') #unload sheet to free memory bk.unload_sheet("Sheet1") print 'File create Done!'