word image generator

Coding/Image

word image generator

linguana 2021. 7. 4. 18:40

# ibm text generator

https://github.com/IBM/tensorflow-hangul-recognition/blob/master/tools/hangul-image-generator.py

# 국립국어원 학습자용 단어 목록 엑셀파일

https://www.korean.go.kr/front/etcData/etcDataView.do?mn_id=46&etc_seq=71

import os
import io
import glob
import pandas as pd
from PIL import Image, ImageFont, ImageDraw

IMAGE_WIDTH = 300
IMAGE_HEIGHT = 75

words_file = "/content/drive/MyDrive/ICDAR2019_Korean/korean_words.csv"
fonts_dir = "/content/drive/MyDrive/ICDAR2019_Korean/fonts"
output_dir = "/content/drive/MyDrive/ICDAR2019_Korean/input"


def generate_hangul_images(label_file, fonts_dir, output_dir):
    words_csv = pd.read_csv(words_file)
    words_lst = words_csv[u'단어'].tolist()

    image_base_dir = os.path.join(output_dir, 'hangul-images')
    if not os.path.exists(image_base_dir):
        os.makedirs(os.path.join(image_base_dir))

    # Get a list of the fonts.
    fonts = glob.glob(os.path.join(fonts_dir, '*.ttf'))

    labels_csv = io.open(os.path.join(output_dir, 'labels-map.csv'), 'w',
                         encoding='utf-8')

    total_count = 0
    prev_count = 0
    for word in words_lst:
      try:
        # Print image count roughly every 5000 images.
        if total_count - prev_count > 5000:
            prev_count = total_count
            print('{} images generated...'.format(total_count))

        for font_dir in fonts:
            total_count += 1
            image = Image.new('L', (IMAGE_WIDTH, IMAGE_HEIGHT), color=(255))
            font = ImageFont.truetype(font_dir, 48)
            drawing = ImageDraw.Draw(image)
            w, h = drawing.textsize(word, font=font)
            drawing.text(
                ((IMAGE_WIDTH-w)/2, (IMAGE_HEIGHT-h)/2),
                word,
                fill=(0),
                font=font
            )
            image_dir = os.path.join(image_base_dir, (font_dir.split('/')[-1]).split('.')[-2])
            if not os.path.exists(image_dir):
                os.makedirs(os.path.join(image_dir))

            file_string = '{}.png'.format(word)
            file_path = os.path.join(image_dir, file_string)
            image.save(file_path, 'PNG')
            labels_csv.write(u'{},{}\n'.format(file_path, word))
      
      except Exception as e:
        print(e)
        continue

    print('Finished generating {} images.'.format(total_count))
    labels_csv.close()

if __name__ == "__main__":
  generate_hangul_images(words_file, fonts_dir, output_dir)

ibm_word_generator.ipynb

0.00MB

저작자표시 비영리 변경금지 (새창열림)