Google Apps Script で PDF をテキスト化する


はじめに

GASでPDFの内容を読み取りたい!と思った時に書いたコードの記録

コード

export class PDFService {

    toStringByFileId(fileId: string): string {
        const blob: GoogleAppsScript.Base.Blob = DriveApp.getFileById(fileId).getBlob();
        const resource: any = {
            title: blob.getName(),
            mimeType: blob.getContentType(),
        };
        const doc: GoogleAppsScript.Document.Document = this.toDoc(resource, blob, 'ja');
        return doc.getBody().getText();
    }

    private toDoc(resource: any, blob: GoogleAppsScript.Base.Blob, language: string): GoogleAppsScript.Document.Document {
        const options: any = {ocr: true, ocrLanguage: language};
        try {
            const file: GoogleAppsScript.Drive.Schema.File = Drive.Files.insert(resource, blob, options);
            return DocumentApp.openById(file.id);
        } catch(e: any) {
            console.error(e);
        }
    }
}