CakePHPでExcelファイルをパースしてDBにインポートする


作った理由は PhpOffice\PhpSpreadsheet を使ってみたかった。

仕様は

  • シート名がキャメルケースのテーブル名(TableRegistory::getで使う)
  • 1行目がテーブルカラム名
  • 空セルの場合は""に更新するようにする
  • 一旦Updateしか行わないようにする
  • コマンドは bin/cake Excel/Import /path/to/excel.xlsx

インストールは

composer require phpoffice/phpspreadsheet

コードは

<?php
namespace App\Shell\Excel;

use App\Shell\BaseShell;
use Cake\Log\Log;
use Cake\ORM\Entity;
use Cake\ORM\Table;
use Cake\ORM\TableRegistry;
use PhpOffice\PhpSpreadsheet\Reader\Xlsx;
use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet;

/**
 * Execution example:
 *  - bin/cake Excel/Import /path/to/excel.xlsx
 */
class ImportShell extends BaseShell {

    /**
     * option configuration
     * @return \Cake\Console\ConsoleOptionParser
     */
    public function getOptionParser()
    {
        $parser = parent::getOptionParser();
        $parser->addArgument('file', ['help' => 'Please specify the Excel file.', 'required' => true]);
        return $parser;
    }

    /**
     * Update contents_master data with Contents Master API.
     */
    public function main()
    {
        try {
            $this->process();
        } catch (\Exception $e) {
            Log::error($e->getMessage());
        }
    }

    /**
     * @throws \Exception
     * @throws \PhpOffice\PhpSpreadsheet\Reader\Exception
     */
    private function process()
    {
        $filePath = $this->args[0];
        $this->import($filePath);
    }

    /**
     * @param string $filePath
     *
     * @throws \Exception
     * @throws \PhpOffice\PhpSpreadsheet\Reader\Exception
     */
    private function import($filePath)
    {
        if (!is_readable($filePath)) {
            throw new \Exception("This file is unreadable or not found.");
        }
        $reader = new Xlsx();
        $spreadsheet = $reader->load($filePath);
        $worksheetNames = $reader->listWorksheetNames($filePath);
        foreach($worksheetNames as $sheetName){
            $table = TableRegistry::get($sheetName);
            if (!$table) {
                throw new \Exception("Not found table class. " . $sheetName);
            }

            /** @var Worksheet $sheet */
            $sheet = $spreadsheet->getSheetByName($sheetName);
            $rows = $sheet->toArray();
            $rows = $this->convertToAssoc($rows);
            $this->save($table, $rows);
        }

    }

    /**
     * Convert to Associative array from simple array.
     * @param $rows
     *
     * @return array
     */
    private function convertToAssoc($rows)
    {
        $results = [];
        $headerCols = [];
        foreach ($rows as $i => $row) {
            if ($i === 0) {
                $headerCols = $row;
                continue;
            }
            if (!$headerCols) {
                continue;
            }

            foreach ($row as $col => $data){
                $name = $headerCols[$col];
                if($data === null) {
                    $data = "";
                }
                $results[$i][$name] = $data;
            }
        }
        return array_merge($results);// organize index.
    }

    /**
     * @param Table $table
     * @param array $data
     *
     * @throws \Exception
     */
    private function save(Table $table, $data)
    {
        if (!$data) {
            return;
        }
        $ids = array_column($data, 'id');
        $entities = $table->find()->where(['id in' => $ids])->select(array_keys($data[0]))->all();
        $entities = $table->patchEntities($entities, $data);
        foreach ($entities as $entity) {

            /** @var Entity $entity */
            if ($entity->isNew()) {
                continue;// Not to add the new one for now.
            }

            if (!$table->save($entity)) {
                $errorMsg = implode("\n", $entity->errors());
                throw new \Exception(sprintf("Failed to import for a row. errors: ", $errorMsg));
            }
        }
    }


}