#データ符号化と処理(cookbookノート)
9095 ワード
データ符号化と処理
csvファイルの読み書き
import csv
from collections import namedtuple
def main1():
with open('stocks.csv') as f:
f_csv = csv.reader(f)
#
headers = next(f_csv)
for row in f_csv:
print (row)
def main2():
with open('stocks.csv') as f:
f_csv = csv.reader(f)
headings = next(f_csv)
Row = namedtuple('Row', headings)
for r in f_csv:
row = Row(*r)
print (row)
def main3():
with open('stocks.csv') as f:
f_csv = csv.DictReader(f)
for r in f_csv:
print (r)
def main5():
import re
with open('stocks.csv') as f:
f_csv = csv.reader(f)
headers = [re.sub(r'[^a-zA-Z]', '_', h) for h in next(f_csv)]
Row = namedtuple('Row', headers)
for r in f_csv:
row = Row(*r)
print (row)
def write_1():
headers = ['Symbol','Price','Date','Time','Change','Volume']
rows = [
('AA', 39.48, '6/11/2007', '9:36am', -0.18, 181800),
('AIG', 71.38, '6/11/2007', '9:36am', -0.15, 195500),
('AXP', 62.58, '6/11/2007', '9:36am', -0.46, 935000),
]
with open('stocks.csv', 'w') as f:
f_csv = csv.writer(f)
f_csv.writerow(headers)
for row in rows:
f_csv.writerow(row)
def write_2():
headers = ['Symbol', 'Price', 'Date', 'Time', 'Change', 'Volume']
rows = [
{'Symbol':'AA', 'Price':39.48, 'Date':'6/11/2007','Time':'9:36am', 'Change':-0.18, 'Volume':181800},
{'Symbol':'AIG', 'Price': 71.38, 'Date':'6/11/2007','Time':'9:36am', 'Change':-0.15, 'Volume': 195500},
{'Symbol':'AXP', 'Price': 62.58, 'Date':'6/11/2007','Time':'9:36am', 'Change':-0.46, 'Volume': 935000},
]
with open('stocks.csv', 'w') as f:
f_csv = csv.DictWriter(f, headers)
f_csv.writeheader()
for row in rows:
f_csv.writerow(row)
def main4():
with open('stocks.csv') as f:
#tab
f_csv = csv.reader(f, delimiter='\t')
for row in f_csv:
print (row)
def convert_1():
col_types = [str, float, str, str, float, int]
with open('stocks.csv') as f:
f_csv = csv.reader(f)
headers = next(f_csv)
for row in f_csv:
row = tuple(convert(value) for convert, value in zip(col_types, row))
print (row)
def convert_2():
field_types = [
('Price', float),
('Change', float),
('Volume', int)
]
with open('stocks.csv') as f:
for row in csv.DictReader(f):
# ,
row.update((key, conversion(row[key])) for key, conversion in field_types)
print (row)
Josnデータの読み書き
s= '{"name": "ACME", "shares": 50, "price": 490.1}'
>>> from collections import OrderedDict
>>> json.loads(s, object_pairs_hook=OrderedDict)
OrderedDict([('name', 'ACME'), ('shares', 50), ('price', 490.1)])
>>> class JSONObject:
... def __init__(self, d):
... self.__dict__ = d
>>> data = json.loads(s, object_hook=JSONObject)
>>> data.name
'ACME'
>>>
>>> print (json.dumps({'a':1}, indent=4))
{
"a": 1
}
import json
class Point:
def __init__(self, x, y):
self.x = x
self.y = y
def serialize_instance(obj):
d = {'__classname__':type(obj).__name__}
d.update(vars(obj))
return d
if __name__ == '__main__':
p = Point(2, 3)
s = json.dumps(p, default=serialize_instance)
print (s)
解析が簡単なxml
>>> from urllib.request import urlopen
>>> from xml.etree.ElementTree import parse
>>> u = urlopen('http://planet.python.org/rss20.xml')
>>> doc = parse(u)
>>> doc
>>> e = doc.find('channel/title')
>>> e.tag
'title'
>>> e.text
'Planet Python'
辞書をxmlに変換
from xml.etree.ElementTree import Element, tostring
def dict_to_xml(tag, d):
#
elem = Element(tag)
for key, val in d.items():
child = Element(key)
child.text = str(val)
#
elem.append(child)
return elem
if __name__ == '__main__':
s = {'name':'GOOG', 'shares':100, 'price':490.1}
e = dict_to_xml('stock', s)
#
e.set('_id', '1234')
print (tostring(e))
#
b'GOOG 100 490.1 '
def dict_to_xml_str(tag, d):
parts = [''.format(tag)]
for key, val in d.items():
parts.append('{1}<0>'.format(key, val))
parts.append('{}>'.format(tag))
return ''.join(parts)
if __name__ == '__main__':
s = {'name':'GOOG', 'shares':100, 'price':490.1}
e = dict_to_xml_str('stock', s)
print (e)
#
GOOG<0>100<0>490.1<0>
16進数の符号化と復号化
>>> s = b'hello'
>>> import binascii
>>> h = binascii.b2a_hex(s)
>>> h
b'68656c6c6f'
>>> binascii.a2b_hex(h)
b'hello'
>>> import base64
>>> h = base64.b16encode(s)
>>> h
b'68656C6C6F'
>>> base64.b16decode(h)
b'hello'
# Unicode
>>> h = h.decode('ascii')
>>> h
'68656C6C6F'
Encode(符号化)decode(復号)base 64
>>> import base64
>>> s = b'hello'
>>> a = base64.b64encode(s)
>>> a
b'aGVsbG8='
>>> base64.b64decode(a)
b'hello'
# unicode
>>> base64.b64decode(a).decode('ascii')
'hello'
読み書きバイナリ配列データ
from struct import Struct
def write_records(records, format, f):
record_struct = Struct(format)
for r in records:
f.write(record_struct.pack(*r))
if __name__ == '__main__':
#write
records = [
(1, 2.3, 4.5),
(6, 7.8, 9.0),
(12, 13.4, 56.7),
]
with open('data.b', 'wb') as f:
# int double double
write_records(records, '
def read_records(format, f):
record_struct = Struct(format)
# lambda, size , b''
chunks = iter(lambda: f.read(record_struct.size), b'')
# , 20
return (record_struct.unpack(chunk) for chunk in chunks)
if __name__ == '__main__':
#read
with open('data.b', 'rb') as f:
for r in read_records('
def unpack_records(format, data):
'''
,
'''
record_struct = Struct(format)
return (record_struct.unpack_from(data, offset) \
for offset in range(0, len(data), record_struct.size))
if __name__ == '__main__':
#read
with open('data.b', 'rb') as f:
data = f.read()
for rec in unpack_records('