機械学習プロジェクト(六)医療知識スペクトル構築(四)
140805 ワード
Neo4j
Neo 4 jは、テーブルではなくネットワーク上にメカニズム化されたデータを格納する高性能NOSQLグラフィックスデータベースです.
Nodeノード図データベース内のすべてのエンティティノードrelationshipを取得するすべての追加削除変更のエッジ関係ノードNodeMatcher検索ノードを追加
尋医問薬の4つの階層ファイルと臨床用薬のエンティティを抽出しneo 4 jデータベースに直接配置し、階層ベクトルのうち、最後は具体的な疾患であり、以前は科室または部位語が直接エンティティをneo 4 j図データに存在したため、コードを実行する前にローカルの図データベースに接続し、帳簿パスワードはset_config関数で変更
Neo 4 jは、テーブルではなくネットワーク上にメカニズム化されたデータを格納する高性能NOSQLグラフィックスデータベースです.
Nodeノード図データベース内のすべてのエンティティノードrelationshipを取得するすべての追加削除変更のエッジ関係ノードNodeMatcher検索ノードを追加
#coding:utf-8
from py2neo import Graph, Node, Relationship,NodeMatcher
import pandas as pd
import re
buwei = [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']
keshi = [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']
buwei_show_list = []
keshi_show_list = []
linchuang_show_list = []
# from .get_level import get_level
from zhishi_tupu.get_level import get_level
def create_kg():
graph = Graph("http://localhost:7474", username="neo4j", password='')
matcher = NodeMatcher(graph)
graph.delete_all()
skus = [] # SKU ,
trade_names = [] # ,
generic_names = [] #
specifications = [] # ,
first_commoditys = [] #
second_commoditys = [] #
third_commoditys = [] #
OTC_labels = [] # OTC
medicine_classifications = [] #
diseases = [] #
symptoms = [] #
components = [] #
entity_num ={
' SKU ':0,' ':0,' ':0,' ':0,' ':0,' ':0,' ':0,'OTC ':0,' ':0,' ':0,' ':0,' ':0,
}
relationgship_num ={
' ':0,' ':0,' ':0,' ':0,' ':0,' ':0,' ':0
}
xunyiwenyao_entity ={
' ':0,' ':0,' ':0
}
xunyiwenyao_level, linchuang_medicine_level, graph = get_level()
data = pd.read_excel(' .xlsx')
# SKU OTC
for i in range(data.shape[0]):
sku = str(data[' SKU '].loc[i])
if sku not in skus:
sku_node = Node('SKU', name=sku)
graph.create(sku_node)
entity_num[' SKU ']+=1
skus.append(sku)
else:
sku_node = matcher.match("SKU").where(name=sku).first()
trade_name = str(data[' '].loc[i])
if trade_name not in trade_names:
trade_name_node = Node(' ', name=trade_name)
graph.create(trade_name_node)
entity_num[' ']+=1
trade_names.append(trade_name)
else:
trade_name_node = matcher.match(" ").where(name=trade_name).first()
# for medicine_name in linchuang_medicine_level:
# if trade_name in medicine_name:
# print(trade_name,medicine_name)
# relationship graph
graph.create(Relationship(sku_node, ' ', trade_name_node))
relationgship_num[' ']+=1
generic_name = str(data[' '].loc[i])
if generic_name not in generic_names:
generic_name_node = Node(' ', name=generic_name)
graph.create(generic_name_node)
entity_num[' ']+=1
generic_names.append(generic_name)
else:
generic_name_node = matcher.match(' ').where(name=generic_name).first()
graph.create(Relationship(sku_node, ' ', generic_name_node))
relationgship_num[' ']+=1
specification = str(data[' '].loc[i])
if specification not in specifications:
specification_node = Node(' ', name=specification)
graph.create(specification_node)
entity_num[' ']+=1
specifications.append(specification)
else:
specification_node = matcher.match(' ').where(name=specification).first()
graph.create(Relationship(sku_node, ' ', specification_node))
relationgship_num[' ']+=1
first_commodity = str(data[' '].loc[i])
if first_commodity not in first_commoditys:
first_commodity_node = Node(' ', name=first_commodity)
entity_num[' ']+=1
graph.create(first_commodity_node)
first_commoditys.append(first_commodity)
else:
first_commodity_node = matcher.match(' ').where(name=first_commodity).first()
second_commodity = str(data[' '].loc[i])
if second_commodity not in second_commoditys:
second_commodity_node = Node(' ', name=second_commodity)
entity_num[' ']+=1
graph.create(second_commodity_node)
second_commoditys.append(second_commodity)
graph.create(Relationship(first_commodity_node, ' ', second_commodity_node))
relationgship_num[' ']+=1
else:
second_commodity_node = matcher.match(' ').where(name=second_commodity).first()
third_commodity = str(data[' '].loc[i])
if third_commodity not in third_commoditys:
third_commodity_node = Node(' ', name=third_commodity)
entity_num[' ']+=1
graph.create(third_commodity_node)
third_commoditys.append(third_commodity)
graph.create(Relationship(second_commodity_node, ' ', third_commodity_node))
relationgship_num[' ']+=1
else:
third_commodity_node = matcher.match(' ').where(name=third_commodity).first()
graph.create(Relationship(third_commodity_node, ' ', sku_node))
relationgship_num[' ']+=1
OTC_label = str(data['OTC '].loc[i])
if OTC_label not in OTC_labels:
OTC_label_node = Node('OTC ', name=OTC_label)
entity_num['OTC ']+=1
graph.create(OTC_label_node)
OTC_labels.append(OTC_label)
else:
OTC_label_node = matcher.match('OTC ').where(name=OTC_label).first()
graph.create(Relationship(OTC_label_node, ' ', sku_node))
relationgship_num[' ']+=1
medicine_classification = str(data[' '].loc[i])
if medicine_classification not in medicine_classifications:
medicine_classification_node = Node(' ', name=medicine_classification)
entity_num[' ']+=1
graph.create(medicine_classification_node)
medicine_classifications.append(medicine_classification)
else:
medicine_classification_node = matcher.match(' ').where(name=medicine_classification).first()
graph.create(Relationship(medicine_classification_node, ' ', sku_node))
relationgship_num[' ']+=1
disease_list = str(data[' '].loc[i])
disease_list = re.split(pattern=',|,', string=disease_list)
for disease in disease_list:
if disease not in diseases:
disease_node = Node(' ', name=disease)
graph.create(disease_node)
entity_num[' ']+=1
diseases.append(disease)
else:
disease_node = matcher.match(' ').where(name=disease).first()
graph.create(Relationship(sku_node, ' ', disease_node))
relationgship_num[' ']+=1
for j in xunyiwenyao_level:
if disease == j[-1]:
# print(' ',disease)
if j[-2] in keshi :
disease_frot1 = matcher.match(' ').where(name=j[-2]).first()
graph.create(Relationship(disease_frot1, ' ', disease_node))
relationgship_num[' ']+=1
if j[-2] not in keshi_show_list:
xunyiwenyao_entity[' ']+=1
keshi_show_list.append(j[-2])
elif j[-2] in buwei:
disease_frot2 = matcher.match(' ').where(name=j[-2]).first()
graph.create(Relationship(disease_frot2, ' ', disease_node))
relationgship_num[' ']+=1
if j[-2] not in buwei_show_list:
xunyiwenyao_entity[' ']+=1
buwei_show_list.append(j[-2])
symptom_list = str(data[' '].loc[i])
symptom_list = re.split(pattern=',|,', string=symptom_list)
for symptom in symptom_list:
if symptom not in symptoms:
symptom_node = Node(' ', name=symptom.strip())
entity_num[' ']+=1
graph.create(symptom_node)
symptoms.append(symptom)
else:
symptom_node = matcher.match(' ').where(name=symptom).first()
try:
graph.create(Relationship(sku_node, ' ', symptom_node))
relationgship_num[' ']+=1
except:
print(sku_node,symptom_node)
for j in xunyiwenyao_level:
if symptom == j[-1]:
# print(symptom)
disease_frot1 = matcher.match(' ').where(name=j[-2]).first()
disease_frot2 = matcher.match(' ').where(name=j[-2]).first()
if disease_frot1 != None:
graph.create(Relationship(disease_frot1, ' ', symptom_node))
relationgship_num[' ']+=1
if j[-2] not in keshi_show_list:
xunyiwenyao_entity[' ']+=1
keshi_show_list.append(j[-2])
if disease_frot2 != None:
graph.create(Relationship(disease_frot2, ' ', symptom_node))
relationgship_num[' ']+=1
if j[-2] not in buwei_show_list:
xunyiwenyao_entity[' ']+=1
buwei_show_list.append(j[-2])
for medicine_name in linchuang_medicine_level:
if generic_name in medicine_name:
medicine_name_node = matcher.match(' ').where(name=medicine_name[-2])
if medicine_name[-2] not in linchuang_show_list:
xunyiwenyao_entity[' '] +=1
linchuang_show_list.append(medicine_name[-2])
graph.create(Relationship(medicine_name_node, ' ', sku_node))
relationgship_num[' ']+=1
# print(generic_name,medicine_name)
component_list = str(data[' '].loc[i]).strip()
component_list = re.split(pattern=',|,', string=component_list)
for component in component_list:
if component not in components:
component_node = Node(' ', name=component)
entity_num[' ']+=1
graph.create(component_node)
components.append(component)
else:
component_node = matcher.match(' ').where(name=component).first()
graph.create(Relationship(sku_node, ' ', component_node))
relationgship_num[' ']+=1
for disease in disease_list:
disease_node = matcher.match(' ').where(name=disease).first()
for symptom in symptom_list:
symptom_node = matcher.match(' ').where(name=symptom).first()
if symptom_node ==None:
symptom_node = Node(' ',name = symptom)
graph.create(symptom_node)
try:
graph.create(Relationship(symptom_node, ' ', disease_node))
relationgship_num[' ']+=1
except:
print(symptom_node,disease_node)
zong = 0
for i in entity_num.values():
zong+=i
print(' demo {} , :
|
:-:|:-:'.format(zong))
for k,v in entity_num.items():
print(k,'|',v)
print('
')
zong = 0
for i in xunyiwenyao_entity.values():
zong+=i
print(' {} , :'.format(zong))
for k,v in xunyiwenyao_entity.items():
print(k,'|',v)
zong = 0
for i in relationgship_num.values():
zong+=1
print(' demo {} ,
|
:-:|:-:'.format(zong))
for k,v in relationgship_num.items():
print(k,'|',v)
print('
')
if __name__ =='__main__':
create_kg()
尋医問薬の4つの階層ファイルと臨床用薬のエンティティを抽出しneo 4 jデータベースに直接配置し、階層ベクトルのうち、最後は具体的な疾患であり、以前は科室または部位語が直接エンティティをneo 4 j図データに存在したため、コードを実行する前にローカルの図データベースに接続し、帳簿パスワードはset_config関数で変更
from py2neo import Graph, Node, Relationship,NodeMatcher
def set_config():
global xunyiwenyao_level,xunyiwenyao_node,linchuang_medicine_level,linchuang_medicine_node,entity_num,relation_num,buwei,keshi,graph,matcher
graph = Graph("http://localhost:7474", username="neo4j", password='')
# graph = Graph.run("http://localhost:7474", username="neo4j", password='tyx48628162')
matcher = NodeMatcher(graph)
graph.delete_all()
xunyiwenyao_level = [] # , ,
xunyiwenyao_node = [] #
linchuang_medicine_level = []
linchuang_medicine_node = []
entity_num = {' ':0,' ':0,' ':0}
relation_num= {' ':0}
buwei = [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']
keshi = [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']
def get_depaetment_paert_from_xunyiwenyao():
# , , ,
with open('data/xunyiwenyao/ -- -- .txt','r',encoding='utf-8') as f:
for i in f.readlines():
List1 = i.strip().split('\t')[1].split('-->')
List = [ii.lstrip('\"') for ii in List1 if ii != List1[-1] and ii != '']
if List[0] not in xunyiwenyao_node:
graph.create(Node(' ', name=List[0]))
entity_num[' ']+=1
xunyiwenyao_node.append(List[0])
for j in range(1, len(List)):
if List[j] not in xunyiwenyao_node:
this_node = Node(' ', name=List[j])
graph.create(this_node)
entity_num[' '] += 1
xunyiwenyao_node.append(List[j])
else:
this_node = matcher.match(' ').where(name=List[j]).first()
front_node = matcher.match(' ').where(name = List[j-1]).first()
# front_node = matcher.match(' ').where("_.name = 'List[j-1]'").first()
graph.create(Relationship(front_node, ' ', this_node))
relation_num[' ']+=1
List.append(List1[-1])
xunyiwenyao_level.append(List)
with open('data/xunyiwenyao/ -- -- .txt','r',encoding='utf-8') as f:
for i in f.readlines():
List = i.strip().split('\t')[1].split('-->')
List = [ii.lstrip('\"') for ii in List if ii!=List[-1] and ii!='']
if List[0] not in xunyiwenyao_node:
graph.create(Node(' ', name=List[0]))
entity_num[' '] += 1
xunyiwenyao_node.append(List[0])
for j in range(1, len(List)):
if List[j] not in xunyiwenyao_node:
this_node = Node(' ', name=List[j])
entity_num[' '] += 1
graph.create(this_node)
xunyiwenyao_node.append(List[j])
else:
this_node = matcher.match(' ').where(name=List[j]).first()
front_node = matcher.match(' ').where(name=List[j - 1]).first()
graph.create(Relationship(front_node, ' ', this_node))
relation_num[' ']+=1
List.append(List1[-1])
xunyiwenyao_level.append(List)
with open('data/xunyiwenyao/ -- -- .txt','r',encoding='utf-8') as f:
for i in f.readlines():
List1 = i.strip().split('\t')[1].split('-->')
List = [ii.strip() for ii in List1 if ii!=List1[-1] and ii!='']
if List[0] not in xunyiwenyao_node:
if List[0] in keshi:
graph.create(Node(' ', name=List[0]))
entity_num[' '] += 1
elif List[0] in buwei:
graph.create(Node(' ',name = List[0]))
entity_num[' '] +=1
xunyiwenyao_node.append(List[0])
for j in range(1, len(List)):
if List[j] not in xunyiwenyao_node:
if List[j] in buwei:
this_node = Node(' ',name =List[j])
entity_num[' ']+=1
graph.create(this_node)
xunyiwenyao_node.append(List[j])
elif List[j] in keshi:
this_node = Node(' ',name =List[j])
entity_num[' ']+=1
graph.create(this_node)
xunyiwenyao_node.append(List[j])
else:
if List[j] in buwei:
this_node = matcher.match(' ').where(name = List[j]).first()
if this_node ==None:
this_node = Node(' ', name=List[j])
entity_num[' '] += 1
graph.create(this_node)
xunyiwenyao_node.append(List[j])
elif List[j] in keshi:
this_node = matcher.match(' ').where(name = List[j]).first()
if List[j-1] in buwei:
front_node = matcher.match(' ').where(name = List[j-1]).first()
elif List[j-1] in keshi:
front_node = matcher.match(' ').where(name = List[j-1]).first()
try:
graph.create(Relationship(front_node,' ',this_node))
relation_num[' ']+=1
except:
print(front_node,this_node)
List.append(List1[-1])
xunyiwenyao_level.append(List)
with open('data/xunyiwenyao/ -- -- .txt','r',encoding='utf-8') as f:
for i in f.readlines():
List1 = i.strip().split('\t')[1].split('-->')
List = [ii.strip() for ii in List1 if ii!=List1[-1] and ii!='']
if List[0] not in xunyiwenyao_node:
if List[0] in keshi:
graph.create(Node(' ', name=List[0]))
entity_num[' '] += 1
elif List[0] in buwei:
graph.create(Node(' ',name = List[0]))
xunyiwenyao_node.append(List[0])
for j in range(1, len(List)):
if List[j] not in xunyiwenyao_node:
if List[j] in buwei:
this_node = Node(' ',name =List[j])
entity_num[' ']+=1
graph.create(this_node)
xunyiwenyao_node.append(List[j])
elif List[j] in keshi:
this_node = Node(' ',name =List[j])
entity_num[' ']+=1
graph.create(this_node)
xunyiwenyao_node.append(List[j])
else:
if List[j] in buwei:
this_node = matcher.match(' ').where(name = List[j]).first()
elif List[j] in keshi:
this_node = matcher.match(' ').where(name = List[j]).first()
if List[j-1] in buwei:
front_node = matcher.match(' ').where(name = List[j-1]).first()
elif List[j-1] in keshi:
front_node = matcher.match(' ').where(name = List[j-1]).first()
graph.create(Relationship(front_node,' ',this_node))
relation_num[' ']+=1
List.append(List1[-1])
# if ' ' in ''.join(List):
# print(List1)
xunyiwenyao_level.append(List)
return xunyiwenyao_level, xunyiwenyao_node, graph
def get_medicine_level():
# v1 , _ .txt
# v2 , / / 2018 / .txt
with open('data/medicine_level/ .txt','r',encoding='utf-8') as f :
for i in f.readlines():
List1 = i.strip().split('->')
List = [ii.lstrip('\"') for ii in List1 if ii != List1[-1] and ii != '']
if List[0] not in linchuang_medicine_node:
graph.create(Node(' ', name=List[0]))
entity_num[' '] += 1
linchuang_medicine_node.append(List[0])
for j in range(1, len(List)):
if List[j] not in linchuang_medicine_node:
this_node = Node(' ', name=List[j])
entity_num[' '] += 1
graph.create(this_node)
linchuang_medicine_node.append(List[j])
else:
this_node = matcher.match(' ').where(name=List[j]).first()
front_node = matcher.match(' ').where(name=List[j - 1]).first()
graph.create(Relationship(front_node, ' ', this_node))
relation_num[' ']+=1
List.append(List1[-1])
linchuang_medicine_level.append(List)
return linchuang_medicine_level,linchuang_medicine_node,graph
def get_level():
set_config()
xunyiwenyao_level, xunyiwenyao_node, graph = get_depaetment_paert_from_xunyiwenyao()
linchuang_medicine_level,linchuang_medicine_node,graph = get_medicine_level()
with open(' / .txt','w',encoding='utf-8') as f:
for i in xunyiwenyao_level:
f.write('-->'.join(i))
f.write('
')
zong = 0
for i in entity_num.values():
zong+=i
print(' {} , :
|
:-:|:-:'.format(zong))
for k,v in entity_num.items():
print(k,'|',v)
print('
')
zong = 0
for i in relation_num.values():
zong+=i
print(' {} , :
|
:-:|:-:'.format(zong))
for k,v in relation_num.items():
print(k,'|',v)
print('
')
# print(linchuang_medicine_level)
return xunyiwenyao_level, linchuang_medicine_level, graph
if __name__ =='__main__':
get_level()
# get_disease_symptom_level()