機械学習プロジェクト(六)医療知識スペクトル構築(四)

140805 ワード

Neo4j
Neo 4 jは、テーブルではなくネットワーク上にメカニズム化されたデータを格納する高性能NOSQLグラフィックスデータベースです.
Nodeノード図データベース内のすべてのエンティティノードrelationshipを取得するすべての追加削除変更のエッジ関係ノードNodeMatcher検索ノードを追加
#coding:utf-8
from py2neo import Graph, Node, Relationship,NodeMatcher
import pandas as pd
import re
buwei = ['  ', '    ', '  ', ' ', '    ', '  ', ' ', '  ', '  ', ' ', '  ', '  ', '  ', '  ', '    ', '    ', '  ', '  ', ' ']
keshi = ['  ', '   ', '   ', '   ', '   ', '     ', '   ', '    ', '   ', '   ', '     ', '    ', '    ', '    ', '      ', '  ', '    ', '    ', '   ', '   ', '  ', '   ', '    ', '   ', '   ', '   ', '    ', '   ', '    ', '    ', '   ', '    ', '    ', '     ', '    ', '   ', '    ', '   ', '  ', '  ', '    ', '   ', '   ', '   ', '    ', '   ', '     ', '   ', '    ', '     ', '  ', '   ', '  ', '   ',
         '    ', '    ', '    ', '    ', '    ', '    ', '    ', '  ', '  ']
buwei_show_list = []
keshi_show_list = []
linchuang_show_list = []

# from .get_level import get_level
from zhishi_tupu.get_level import get_level

def create_kg():
    graph = Graph("http://localhost:7474", username="neo4j", password='')
    matcher = NodeMatcher(graph)
    graph.delete_all()

    skus = []  # SKU ,
    trade_names = []  #     ,
    generic_names = []  #     
    specifications = []  #     ,             
    first_commoditys = []  #       
    second_commoditys = []  #       
    third_commoditys = []  #       
    OTC_labels = []  # OTC    
    medicine_classifications = []  #     
    diseases = []  #   
    symptoms = []  #   
    components = []  #     
    entity_num ={
        '  SKU  ':0,'    ':0,'    ':0,'  ':0,'      ':0,'      ':0,'      ':0,'OTC    ':0,'    ':0,'  ':0,'  ':0,'  ':0,

    }
    relationgship_num ={
        '     ':0,'     ':0,'   ':0,'  ':0,'  ':0,'  ':0,'  ':0
    }
    xunyiwenyao_entity ={
        '  ':0,'  ':0,'    ':0
    }
    xunyiwenyao_level, linchuang_medicine_level, graph = get_level()

    data = pd.read_excel('    .xlsx')
    #   SKU  	    	    	  	      	      	      	OTC  	  	  	  	  

    for i in range(data.shape[0]):
        sku = str(data['  SKU  '].loc[i])
        if sku not in skus:
            sku_node = Node('SKU', name=sku)
            graph.create(sku_node)
            entity_num['  SKU  ']+=1
            skus.append(sku)
        else:
            sku_node = matcher.match("SKU").where(name=sku).first()

        trade_name = str(data['    '].loc[i])
        if trade_name not in trade_names:
            trade_name_node = Node('    ', name=trade_name)
            graph.create(trade_name_node)
            entity_num['    ']+=1
            trade_names.append(trade_name)
        else:
            trade_name_node = matcher.match("    ").where(name=trade_name).first()
        # for medicine_name in linchuang_medicine_level:
        # if trade_name in medicine_name:
        #     print(trade_name,medicine_name)

        #     relationship         graph    
        graph.create(Relationship(sku_node, '     ', trade_name_node))
        relationgship_num['     ']+=1

        generic_name = str(data['    '].loc[i])
        if generic_name not in generic_names:
            generic_name_node = Node('    ', name=generic_name)
            graph.create(generic_name_node)
            entity_num['    ']+=1
            generic_names.append(generic_name)
        else:
            generic_name_node = matcher.match('    ').where(name=generic_name).first()



        graph.create(Relationship(sku_node, '     ', generic_name_node))
        relationgship_num['     ']+=1

        specification = str(data['  '].loc[i])
        if specification not in specifications:
            specification_node = Node('  ', name=specification)
            graph.create(specification_node)
            entity_num['  ']+=1
            specifications.append(specification)
        else:
            specification_node = matcher.match('  ').where(name=specification).first()
        graph.create(Relationship(sku_node, '   ', specification_node))
        relationgship_num['   ']+=1

        first_commodity = str(data['      '].loc[i])
        if first_commodity not in first_commoditys:
            first_commodity_node = Node('      ', name=first_commodity)
            entity_num['      ']+=1
            graph.create(first_commodity_node)
            first_commoditys.append(first_commodity)
        else:
            first_commodity_node = matcher.match('      ').where(name=first_commodity).first()

        second_commodity = str(data['      '].loc[i])
        if second_commodity not in second_commoditys:
            second_commodity_node = Node('      ', name=second_commodity)
            entity_num['      ']+=1
            graph.create(second_commodity_node)
            second_commoditys.append(second_commodity)
            graph.create(Relationship(first_commodity_node, '  ', second_commodity_node))
            relationgship_num['  ']+=1
        else:
            second_commodity_node = matcher.match('      ').where(name=second_commodity).first()

        third_commodity = str(data['      '].loc[i])
        if third_commodity not in third_commoditys:
            third_commodity_node = Node('      ', name=third_commodity)
            entity_num['      ']+=1
            graph.create(third_commodity_node)
            third_commoditys.append(third_commodity)
            graph.create(Relationship(second_commodity_node, '  ', third_commodity_node))
            relationgship_num['  ']+=1
        else:
            third_commodity_node = matcher.match('      ').where(name=third_commodity).first()
        graph.create(Relationship(third_commodity_node, '  ', sku_node))
        relationgship_num['  ']+=1

        OTC_label = str(data['OTC  '].loc[i])
        if OTC_label not in OTC_labels:
            OTC_label_node = Node('OTC    ', name=OTC_label)
            entity_num['OTC    ']+=1
            graph.create(OTC_label_node)
            OTC_labels.append(OTC_label)
        else:
            OTC_label_node = matcher.match('OTC    ').where(name=OTC_label).first()
        graph.create(Relationship(OTC_label_node, '  ', sku_node))
        relationgship_num['  ']+=1

        medicine_classification = str(data['  '].loc[i])
        if medicine_classification not in medicine_classifications:
            medicine_classification_node = Node('    ', name=medicine_classification)
            entity_num['    ']+=1
            graph.create(medicine_classification_node)
            medicine_classifications.append(medicine_classification)
        else:
            medicine_classification_node = matcher.match('    ').where(name=medicine_classification).first()
        graph.create(Relationship(medicine_classification_node, '  ', sku_node))
        relationgship_num['  ']+=1

        disease_list = str(data['  '].loc[i])
        disease_list = re.split(pattern=',|,', string=disease_list)
        for disease in disease_list:
            if disease not in diseases:
                disease_node = Node('  ', name=disease)
                graph.create(disease_node)
                entity_num['  ']+=1
                diseases.append(disease)
            else:
                disease_node = matcher.match('  ').where(name=disease).first()
            graph.create(Relationship(sku_node, '  ', disease_node))
            relationgship_num['  ']+=1
            for j in xunyiwenyao_level:
                if disease == j[-1]:
                    # print('    ',disease)
                    if j[-2] in keshi :
                        disease_frot1 = matcher.match('  ').where(name=j[-2]).first()
                        graph.create(Relationship(disease_frot1, '  ', disease_node))
                        relationgship_num['  ']+=1
                        if j[-2] not in keshi_show_list:
                            xunyiwenyao_entity['  ']+=1
                            keshi_show_list.append(j[-2])
                    elif j[-2] in buwei:
                        disease_frot2 = matcher.match('  ').where(name=j[-2]).first()
                        graph.create(Relationship(disease_frot2, '  ', disease_node))
                        relationgship_num['  ']+=1
                        if j[-2] not in buwei_show_list:
                            xunyiwenyao_entity['  ']+=1
                            buwei_show_list.append(j[-2])




        symptom_list = str(data['  '].loc[i])
        symptom_list = re.split(pattern=',|,', string=symptom_list)
        for symptom in symptom_list:
            if symptom not in symptoms:
                symptom_node = Node('  ', name=symptom.strip())
                entity_num['  ']+=1
                graph.create(symptom_node)
                symptoms.append(symptom)
            else:
                symptom_node = matcher.match('  ').where(name=symptom).first()
            try:
                graph.create(Relationship(sku_node, '  ', symptom_node))
                relationgship_num['  ']+=1
            except:
                print(sku_node,symptom_node)
            for j in xunyiwenyao_level:
                if symptom == j[-1]:
                    # print(symptom)
                    disease_frot1 = matcher.match('  ').where(name=j[-2]).first()
                    disease_frot2 = matcher.match('  ').where(name=j[-2]).first()
                    if disease_frot1 != None:
                        graph.create(Relationship(disease_frot1, '  ', symptom_node))
                        relationgship_num['  ']+=1
                        if j[-2] not in keshi_show_list:
                            xunyiwenyao_entity['  ']+=1
                            keshi_show_list.append(j[-2])
                    if disease_frot2 != None:
                        graph.create(Relationship(disease_frot2, '  ', symptom_node))
                        relationgship_num['  ']+=1
                        if j[-2] not in buwei_show_list:
                            xunyiwenyao_entity['  ']+=1
                            buwei_show_list.append(j[-2])
        for medicine_name in linchuang_medicine_level:
            if generic_name in medicine_name:
                medicine_name_node = matcher.match('    ').where(name=medicine_name[-2])
                if medicine_name[-2] not in linchuang_show_list:
                    xunyiwenyao_entity['    '] +=1
                    linchuang_show_list.append(medicine_name[-2])
                graph.create(Relationship(medicine_name_node, '  ', sku_node))
                relationgship_num['  ']+=1
                # print(generic_name,medicine_name)





        component_list = str(data['  '].loc[i]).strip()
        component_list = re.split(pattern=',|,', string=component_list)
        for component in component_list:
            if component not in components:
                component_node = Node('  ', name=component)
                entity_num['  ']+=1
                graph.create(component_node)
                components.append(component)
            else:
                component_node = matcher.match('  ').where(name=component).first()
            graph.create(Relationship(sku_node, '  ', component_node))
            relationgship_num['  ']+=1

        for disease in disease_list:
            disease_node = matcher.match('  ').where(name=disease).first()
            for symptom in symptom_list:
                symptom_node = matcher.match('  ').where(name=symptom).first()
                if symptom_node ==None:
                    symptom_node = Node('  ',name = symptom)
                    graph.create(symptom_node)
                try:
                    graph.create(Relationship(symptom_node, '  ', disease_node))
                    relationgship_num['  ']+=1
                except:
                    print(symptom_node,disease_node)


    zong = 0
    for i in entity_num.values():
        zong+=i
    print('         demo     {} ,      :
|
:-:|:-:'
.format(zong)) for k,v in entity_num.items(): print(k,'|',v) print('
'
) zong = 0 for i in xunyiwenyao_entity.values(): zong+=i print(' {} , :'.format(zong)) for k,v in xunyiwenyao_entity.items(): print(k,'|',v) zong = 0 for i in relationgship_num.values(): zong+=1 print(' demo {} ,
|
:-:|:-:'
.format(zong)) for k,v in relationgship_num.items(): print(k,'|',v) print('
'
) if __name__ =='__main__': create_kg()

尋医問薬の4つの階層ファイルと臨床用薬のエンティティを抽出しneo 4 jデータベースに直接配置し、階層ベクトルのうち、最後は具体的な疾患であり、以前は科室または部位語が直接エンティティをneo 4 j図データに存在したため、コードを実行する前にローカルの図データベースに接続し、帳簿パスワードはset_config関数で変更
from py2neo import Graph, Node, Relationship,NodeMatcher

def set_config():
    global xunyiwenyao_level,xunyiwenyao_node,linchuang_medicine_level,linchuang_medicine_node,entity_num,relation_num,buwei,keshi,graph,matcher
    graph = Graph("http://localhost:7474", username="neo4j", password='')
    # graph = Graph.run("http://localhost:7474", username="neo4j", password='tyx48628162')
    matcher = NodeMatcher(graph)
    graph.delete_all()
    xunyiwenyao_level = [] #            ,    ,        
    xunyiwenyao_node = [] #          
    linchuang_medicine_level = []
    linchuang_medicine_node = []
    entity_num = {'  ':0,'  ':0,'    ':0}
    relation_num= {'  ':0}
    buwei = ['  ', '    ', '  ', ' ', '    ', '  ', ' ', '  ', '  ', ' ', '  ', '  ', '  ', '  ', '    ', '    ', '  ', '  ', ' ']
    keshi = ['  ', '   ', '   ', '   ', '   ', '     ', '   ', '    ', '   ', '   ', '     ', '    ', '    ', '    ', '      ', '  ', '    ', '    ', '   ', '   ', '  ', '   ', '    ', '   ', '   ', '   ', '    ', '   ', '    ', '    ', '   ', '    ', '    ', '     ', '    ', '   ', '    ', '   ', '  ', '  ', '    ', '   ', '   ', '   ', '    ', '   ', '     ', '   ', '    ', '     ', '  ', '   ', '  ', '   ',
             '    ', '    ', '    ', '    ', '    ', '    ', '    ', '  ', '  ']

def get_depaetment_paert_from_xunyiwenyao():
    #                       ,          ,                     ,            
    with open('data/xunyiwenyao/    --  --  .txt','r',encoding='utf-8') as f:
        for i in f.readlines():
            List1 = i.strip().split('\t')[1].split('-->')
            List = [ii.lstrip('\"') for ii in List1 if ii != List1[-1] and ii != '']
            if List[0] not in xunyiwenyao_node:
                graph.create(Node('  ', name=List[0]))
                entity_num['  ']+=1
                xunyiwenyao_node.append(List[0])
            for j in range(1, len(List)):

                if List[j] not in xunyiwenyao_node:
                    this_node = Node('  ', name=List[j])
                    graph.create(this_node)
                    entity_num['  '] += 1

                    xunyiwenyao_node.append(List[j])
                else:
                    this_node = matcher.match('  ').where(name=List[j]).first()

                front_node = matcher.match('  ').where(name = List[j-1]).first()
                # front_node = matcher.match('  ').where("_.name = 'List[j-1]'").first()
                graph.create(Relationship(front_node, '  ', this_node))
                relation_num['  ']+=1

            List.append(List1[-1])
            xunyiwenyao_level.append(List)
    with open('data/xunyiwenyao/    --  --  .txt','r',encoding='utf-8') as f:
        for i in f.readlines():
            List = i.strip().split('\t')[1].split('-->')
            List = [ii.lstrip('\"') for ii in List if ii!=List[-1] and ii!='']
            if List[0] not in xunyiwenyao_node:
                graph.create(Node('  ', name=List[0]))
                entity_num['  '] += 1
                xunyiwenyao_node.append(List[0])
            for j in range(1, len(List)):

                if List[j] not in xunyiwenyao_node:
                    this_node = Node('  ', name=List[j])
                    entity_num['  '] += 1
                    graph.create(this_node)
                    xunyiwenyao_node.append(List[j])
                else:
                    this_node = matcher.match('  ').where(name=List[j]).first()
                front_node = matcher.match('  ').where(name=List[j - 1]).first()
                graph.create(Relationship(front_node, '  ', this_node))
                relation_num['  ']+=1
            List.append(List1[-1])
            xunyiwenyao_level.append(List)
    with open('data/xunyiwenyao/    --  --  .txt','r',encoding='utf-8') as f:
        for i in f.readlines():
            List1 = i.strip().split('\t')[1].split('-->')
            List = [ii.strip() for ii in List1 if ii!=List1[-1] and ii!='']
            if List[0] not in xunyiwenyao_node:
                if List[0]  in keshi:
                    graph.create(Node('  ', name=List[0]))
                    entity_num['  '] += 1
                elif List[0] in buwei:
                    graph.create(Node('  ',name = List[0]))
                    entity_num['  '] +=1
                xunyiwenyao_node.append(List[0])
            for j in range(1, len(List)):

                if List[j] not in xunyiwenyao_node:
                    if List[j] in buwei:
                        this_node = Node('  ',name =List[j])
                        entity_num['  ']+=1
                        graph.create(this_node)
                        xunyiwenyao_node.append(List[j])
                    elif List[j] in keshi:
                        this_node = Node('  ',name =List[j])
                        entity_num['  ']+=1
                        graph.create(this_node)
                        xunyiwenyao_node.append(List[j])
                else:
                    if List[j] in buwei:
                        this_node = matcher.match('  ').where(name = List[j]).first()
                        if this_node ==None:
                            this_node = Node('  ', name=List[j])
                            entity_num['  '] += 1
                            graph.create(this_node)
                            xunyiwenyao_node.append(List[j])

                    elif List[j]  in keshi:
                        this_node = matcher.match('  ').where(name = List[j]).first()

                if List[j-1] in buwei:
                    front_node = matcher.match('  ').where(name = List[j-1]).first()
                elif List[j-1] in keshi:
                    front_node = matcher.match('  ').where(name = List[j-1]).first()

                try:
                    graph.create(Relationship(front_node,'  ',this_node))
                    relation_num['  ']+=1
                except:
                    print(front_node,this_node)
            List.append(List1[-1])
            xunyiwenyao_level.append(List)

    with open('data/xunyiwenyao/    --  --  .txt','r',encoding='utf-8') as f:
        for i in f.readlines():
            List1 = i.strip().split('\t')[1].split('-->')
            List = [ii.strip() for ii in List1 if ii!=List1[-1] and ii!='']
            if List[0] not in xunyiwenyao_node:
                if List[0] in keshi:
                    graph.create(Node('  ', name=List[0]))
                    entity_num['  '] += 1
                elif List[0] in buwei:
                    graph.create(Node('  ',name = List[0]))
                xunyiwenyao_node.append(List[0])
            for j in range(1, len(List)):

                if List[j] not in xunyiwenyao_node:
                    if List[j] in buwei:
                        this_node = Node('  ',name =List[j])
                        entity_num['  ']+=1
                        graph.create(this_node)
                        xunyiwenyao_node.append(List[j])
                    elif List[j] in keshi:
                        this_node = Node('  ',name =List[j])
                        entity_num['  ']+=1
                        graph.create(this_node)
                        xunyiwenyao_node.append(List[j])

                else:
                    if List[j] in buwei:
                        this_node = matcher.match('  ').where(name = List[j]).first()
                    elif List[j] in keshi:
                        this_node = matcher.match('  ').where(name = List[j]).first()
                if List[j-1] in buwei:
                    front_node = matcher.match('  ').where(name = List[j-1]).first()
                elif List[j-1] in keshi:
                    front_node = matcher.match('  ').where(name = List[j-1]).first()

                graph.create(Relationship(front_node,'  ',this_node))
                relation_num['  ']+=1
            List.append(List1[-1])
            # if '   ' in ''.join(List):
            #     print(List1)

            xunyiwenyao_level.append(List)
    return xunyiwenyao_level, xunyiwenyao_node, graph

def get_medicine_level():
    # v1                 ,           _        .txt
    # v2                         ,     /    /        2018 /                     .txt

    with open('data/medicine_level/                     .txt','r',encoding='utf-8') as f :
        for i in f.readlines():
            List1 = i.strip().split('->')
            List = [ii.lstrip('\"') for ii in List1 if ii != List1[-1] and ii != '']
            if List[0] not in linchuang_medicine_node:
                graph.create(Node('    ', name=List[0]))
                entity_num['    '] += 1
                linchuang_medicine_node.append(List[0])
            for j in range(1, len(List)):

                if List[j] not in linchuang_medicine_node:
                    this_node = Node('    ', name=List[j])
                    entity_num['    '] += 1
                    graph.create(this_node)
                    linchuang_medicine_node.append(List[j])
                else:
                    this_node = matcher.match('    ').where(name=List[j]).first()
                front_node = matcher.match('    ').where(name=List[j - 1]).first()
                graph.create(Relationship(front_node, '  ', this_node))
                relation_num['  ']+=1
            List.append(List1[-1])
            linchuang_medicine_level.append(List)
    return linchuang_medicine_level,linchuang_medicine_node,graph



def get_level():
    set_config()
    xunyiwenyao_level, xunyiwenyao_node, graph = get_depaetment_paert_from_xunyiwenyao()
    linchuang_medicine_level,linchuang_medicine_node,graph = get_medicine_level()
    with open('    /              .txt','w',encoding='utf-8') as f:
        for i in xunyiwenyao_level:
            f.write('-->'.join(i))
            f.write('
'
) zong = 0 for i in entity_num.values(): zong+=i print(' {} , :
|
:-:|:-:'
.format(zong)) for k,v in entity_num.items(): print(k,'|',v) print('
'
) zong = 0 for i in relation_num.values(): zong+=i print(' {} , :
|
:-:|:-:'
.format(zong)) for k,v in relation_num.items(): print(k,'|',v) print('
'
) # print(linchuang_medicine_level) return xunyiwenyao_level, linchuang_medicine_level, graph if __name__ =='__main__': get_level() # get_disease_symptom_level()