pythonはesのすべてのデータを読み出しmd 5を計算して永続化する
1727 ワード
#!/usr/bin/python
import threading
import json
import time
from elasticsearch import Elasticsearch
from elasticsearch import helpers
import os
import sys
import argparse
host_list = [
{"host":"1.58.55.11","port":9200},
{"host":"1.58.55.12","port":9200},
{"host":"1.58.55.13","port":9200},
]
es = Elasticsearch(host_list)
size = 1000
query = es.search(index='full_sight',scroll='1m',size=size)
results = query['hits']['hits'] # es
total = query['hits']['total'] # es
scroll_id = query['_scroll_id'] # es
#
page = divmod(total,size)
if page[1] == 0:
page = page[0]
else:
page = page[0] + 1
import hashlib
obj = hashlib.md5()
num = 1
# , md5 ,
for i in range(0, page): # scroll
query_scroll = es.scroll(scroll_id=scroll_id,scroll='1m',)['hits']['hits']
for m in query_scroll:
temp = {}
s = json.dumps(m)
obj.update(bytes(s,encoding="utf-8"))
v = obj.hexdigest()
k = m["_id"]
temp[k] = v
with open("test.text","a") as f:
f.write(json.dumps(temp))
f.write("
")
print(k,num,sep="============>")
num += 1