mnistデータセットをWEB上に持ち込む


概要

mnistデータセットをWEB上に持ち込んでみた。

写真

取得

import _pickle as cPickle
import gzip
import os
import urllib.request

url_base = 'http://deeplearning.net/data/mnist/'
file_name = "mnist.pkl.gz"
dataset_dir = os.path.dirname(os.path.abspath(__file__))
file_path = dataset_dir + "/" + file_name
print ("Downloading " + file_name + " ... ")
#urllib.request.urlretrieve(url_base + file_name, file_path)
print ("Done")
f = gzip.open(file_path, 'rb')
train_set, valid_set, test_set = cPickle.load(f, encoding = 'latin1')
f.close()
print (train_set[0].shape)
print (train_set[1].shape)
print (valid_set[0].shape)
print (valid_set[1].shape)
print (test_set[0].shape)
print (test_set[1].shape)
Downloading mnist.pkl.gz ... 
Done
(50000, 784)
(50000,)
(10000, 784)
(10000,)
(10000, 784)
(10000,)

加工

import _pickle as cPickle
import gzip
import numpy
from scipy.misc import imsave
import os

file_name = "mnist.pkl.gz"
dataset_dir = os.path.dirname(os.path.abspath(__file__))
file_path = dataset_dir + "/" + file_name
f = gzip.open(file_path, 'rb')
train_set, valid_set, test_set = cPickle.load(f, encoding='latin1')
f.close()
x = numpy.concatenate((train_set[0] * 255, valid_set[0] * 255, test_set[0][ : 3000, : ] * 255))
for i in range(20):
    imsave(dataset_dir + "/" + 'mnist_batch_' + str(i) + '.png', x[3000 * i : 3000 * (i + 1), : ])
    print (i)
imsave(dataset_dir + "/" + 'mnist_batch_' + str(20) + '.png', x[60000 : , : ])
L = 'var labels=' + str(list(numpy.concatenate((train_set[1], valid_set[1], test_set[1])))) + ';\n'
open(dataset_dir + "/" + 'mnist_labels.js', 'w').write(L)
print ("Done!")

利用

var url = {
    '0': '/assets/A/j/W/3/AjW3t.png',
    '1': '/assets/e/l/w/h/elwhh.png',
    '2': '/assets/Y/0/k/e/Y0kel.png',
    '3': '/assets/8/O/x/F/8OxFx.png',
    '4': '/assets/q/P/x/6/qPx60.png',
    '5': '/assets/6/j/8/t/6j8tg.png',
    '6': '/assets/S/H/y/u/SHyuZ.png',
    '7': '/assets/q/Z/G/o/qZGoq.png',
    '8': '/assets/o/D/k/E/oDkE9.png',
    '9': '/assets/2/N/L/G/2NLGe.png',
    '10': '/assets/Y/b/w/I/YbwIP.png',
    '11': '/assets/w/e/S/9/weS9c.png',
    '12': '/assets/K/9/6/V/K96Ve.png',
    '13': '/assets/G/2/Y/x/G2Yxd.png',
    '14': '/assets/K/X/8/d/KX8dt.png',
    '15': '/assets/e/8/E/6/e8E6f.png',
    '16': '/assets/S/N/1/Z/SN1Z2.png',
    '17': '/assets/C/5/G/G/C5GGt.png',
    '18': '/assets/s/8/n/V/s8nVW.png',
    '19': '/assets/2/E/a/d/2EadT.png',
    '20': '/assets/K/c/m/W/KcmWd.png',
};
var num_batches = 21;
var data_img_elts = new Array(num_batches);
var img_data = new Array(num_batches);
var loaded = new Array(num_batches);
var loaded_train_batches = [];
var canvas = document.getElementById('canvas')
var ctx = canvas.getContext('2d');
ctx.font = 'bold 15pt Meiryo';
ctx.textAlign = 'center';
ctx.textBaseline = 'middle';
function  draw(batch_num) {
    for (var i = 0; i < 120; i++)
    {
        var canv = document.createElement('canvas');
        canv.width = 28;
        canv.height = 28;
        var ctxt = canv.getContext('2d');
        var g = ctxt.createImageData(28, 28);
        for (var j = 0; j < 784; j++)
        {            
            var pp = j * 4;
            var t = i * 784 * 4 + pp;
            var d = img_data[batch_num].data[t];
            for (var k = 0; k < 3; k++)
            {
                g.data[pp + k] = d;
            } 
            g.data[pp + 3] = 255; 
        }
        var x = (i % 12) * 30;
        var y = Math.floor(i / 12) * 50;
        var l = labels[i];
        ctx.fillText(l, x + 10, y + 40);
        ctx.putImageData(g, x, y);
    }  
}
function load_data_batch(batch_num) {
    data_img_elts[batch_num] = new Image();
    var data_img_elt = data_img_elts[batch_num];  
    data_img_elt.onload = function() { 
        var data_canvas = document.createElement('canvas');
        var data_ctx = data_canvas.getContext("2d");
        data_canvas.width = data_img_elt.width;
        data_canvas.height = data_img_elt.height;
        data_ctx.drawImage(data_img_elt, 0, 0);
        img_data[batch_num] = data_ctx.getImageData(0, 0, data_canvas.width, data_canvas.height);
        loaded[batch_num] = true;
        if (batch_num < 20)
        {
            loaded_train_batches.push(batch_num);
        }
        draw(batch_num);
    };
    data_img_elt.src = url[batch_num];
}
for (var k = 0; k < loaded.length; k++)
{
    loaded[k] = false; 
}
load_data_batch(0);

成果物

以上。