Coding/Image
word image generator
linguana
2021. 7. 4. 18:40
# ibm text generator
https://github.com/IBM/tensorflow-hangul-recognition/blob/master/tools/hangul-image-generator.py
# 국립국어원 학습자용 단어 목록 엑셀파일
https://www.korean.go.kr/front/etcData/etcDataView.do?mn_id=46&etc_seq=71
import os
import io
import glob
import pandas as pd
from PIL import Image, ImageFont, ImageDraw
IMAGE_WIDTH = 300
IMAGE_HEIGHT = 75
words_file = "/content/drive/MyDrive/ICDAR2019_Korean/korean_words.csv"
fonts_dir = "/content/drive/MyDrive/ICDAR2019_Korean/fonts"
output_dir = "/content/drive/MyDrive/ICDAR2019_Korean/input"
def generate_hangul_images(label_file, fonts_dir, output_dir):
words_csv = pd.read_csv(words_file)
words_lst = words_csv[u'단어'].tolist()
image_base_dir = os.path.join(output_dir, 'hangul-images')
if not os.path.exists(image_base_dir):
os.makedirs(os.path.join(image_base_dir))
# Get a list of the fonts.
fonts = glob.glob(os.path.join(fonts_dir, '*.ttf'))
labels_csv = io.open(os.path.join(output_dir, 'labels-map.csv'), 'w',
encoding='utf-8')
total_count = 0
prev_count = 0
for word in words_lst:
try:
# Print image count roughly every 5000 images.
if total_count - prev_count > 5000:
prev_count = total_count
print('{} images generated...'.format(total_count))
for font_dir in fonts:
total_count += 1
image = Image.new('L', (IMAGE_WIDTH, IMAGE_HEIGHT), color=(255))
font = ImageFont.truetype(font_dir, 48)
drawing = ImageDraw.Draw(image)
w, h = drawing.textsize(word, font=font)
drawing.text(
((IMAGE_WIDTH-w)/2, (IMAGE_HEIGHT-h)/2),
word,
fill=(0),
font=font
)
image_dir = os.path.join(image_base_dir, (font_dir.split('/')[-1]).split('.')[-2])
if not os.path.exists(image_dir):
os.makedirs(os.path.join(image_dir))
file_string = '{}.png'.format(word)
file_path = os.path.join(image_dir, file_string)
image.save(file_path, 'PNG')
labels_csv.write(u'{},{}\n'.format(file_path, word))
except Exception as e:
print(e)
continue
print('Finished generating {} images.'.format(total_count))
labels_csv.close()
if __name__ == "__main__":
generate_hangul_images(words_file, fonts_dir, output_dir)
ibm_word_generator.ipynb
0.00MB