TensorFlowの公式チュートリアルを参考にしています。

スペイン語から英語への翻訳を行うモデルを構築します。

In [1]:
%pip install tensorflow_text
%pip install einops
%pip install matplotlib
Requirement already satisfied: tensorflow_text in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (2.16.1)
Requirement already satisfied: tensorflow<2.17,>=2.16.1 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from tensorflow_text) (2.16.1)
Requirement already satisfied: absl-py>=1.0.0 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from tensorflow<2.17,>=2.16.1->tensorflow_text) (2.1.0)
Requirement already satisfied: astunparse>=1.6.0 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from tensorflow<2.17,>=2.16.1->tensorflow_text) (1.6.3)
Requirement already satisfied: flatbuffers>=23.5.26 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from tensorflow<2.17,>=2.16.1->tensorflow_text) (24.3.25)
Requirement already satisfied: gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from tensorflow<2.17,>=2.16.1->tensorflow_text) (0.5.4)
Requirement already satisfied: google-pasta>=0.1.1 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from tensorflow<2.17,>=2.16.1->tensorflow_text) (0.2.0)
Requirement already satisfied: h5py>=3.10.0 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from tensorflow<2.17,>=2.16.1->tensorflow_text) (3.10.0)
Requirement already satisfied: libclang>=13.0.0 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from tensorflow<2.17,>=2.16.1->tensorflow_text) (18.1.1)
Requirement already satisfied: ml-dtypes~=0.3.1 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from tensorflow<2.17,>=2.16.1->tensorflow_text) (0.3.2)
Requirement already satisfied: opt-einsum>=2.3.2 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from tensorflow<2.17,>=2.16.1->tensorflow_text) (3.3.0)
Requirement already satisfied: packaging in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from tensorflow<2.17,>=2.16.1->tensorflow_text) (24.0)
Requirement already satisfied: protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from tensorflow<2.17,>=2.16.1->tensorflow_text) (4.25.3)
Requirement already satisfied: requests<3,>=2.21.0 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from tensorflow<2.17,>=2.16.1->tensorflow_text) (2.31.0)
Requirement already satisfied: setuptools in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from tensorflow<2.17,>=2.16.1->tensorflow_text) (59.6.0)
Requirement already satisfied: six>=1.12.0 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from tensorflow<2.17,>=2.16.1->tensorflow_text) (1.16.0)
Requirement already satisfied: termcolor>=1.1.0 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from tensorflow<2.17,>=2.16.1->tensorflow_text) (2.4.0)
Requirement already satisfied: typing-extensions>=3.6.6 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from tensorflow<2.17,>=2.16.1->tensorflow_text) (4.10.0)
Requirement already satisfied: wrapt>=1.11.0 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from tensorflow<2.17,>=2.16.1->tensorflow_text) (1.16.0)
Requirement already satisfied: grpcio<2.0,>=1.24.3 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from tensorflow<2.17,>=2.16.1->tensorflow_text) (1.62.1)
Requirement already satisfied: tensorboard<2.17,>=2.16 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from tensorflow<2.17,>=2.16.1->tensorflow_text) (2.16.2)
Requirement already satisfied: keras>=3.0.0 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from tensorflow<2.17,>=2.16.1->tensorflow_text) (3.1.1)
Requirement already satisfied: tensorflow-io-gcs-filesystem>=0.23.1 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from tensorflow<2.17,>=2.16.1->tensorflow_text) (0.36.0)
Requirement already satisfied: numpy<2.0.0,>=1.23.5 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from tensorflow<2.17,>=2.16.1->tensorflow_text) (1.26.4)
Requirement already satisfied: wheel<1.0,>=0.23.0 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from astunparse>=1.6.0->tensorflow<2.17,>=2.16.1->tensorflow_text) (0.43.0)
Requirement already satisfied: rich in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from keras>=3.0.0->tensorflow<2.17,>=2.16.1->tensorflow_text) (13.7.1)
Requirement already satisfied: namex in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from keras>=3.0.0->tensorflow<2.17,>=2.16.1->tensorflow_text) (0.0.7)
Requirement already satisfied: optree in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from keras>=3.0.0->tensorflow<2.17,>=2.16.1->tensorflow_text) (0.11.0)
Requirement already satisfied: charset-normalizer<4,>=2 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from requests<3,>=2.21.0->tensorflow<2.17,>=2.16.1->tensorflow_text) (3.3.2)
Requirement already satisfied: idna<4,>=2.5 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from requests<3,>=2.21.0->tensorflow<2.17,>=2.16.1->tensorflow_text) (3.6)
Requirement already satisfied: urllib3<3,>=1.21.1 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from requests<3,>=2.21.0->tensorflow<2.17,>=2.16.1->tensorflow_text) (2.2.1)
Requirement already satisfied: certifi>=2017.4.17 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from requests<3,>=2.21.0->tensorflow<2.17,>=2.16.1->tensorflow_text) (2024.2.2)
Requirement already satisfied: markdown>=2.6.8 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from tensorboard<2.17,>=2.16->tensorflow<2.17,>=2.16.1->tensorflow_text) (3.6)
Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from tensorboard<2.17,>=2.16->tensorflow<2.17,>=2.16.1->tensorflow_text) (0.7.2)
Requirement already satisfied: werkzeug>=1.0.1 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from tensorboard<2.17,>=2.16->tensorflow<2.17,>=2.16.1->tensorflow_text) (3.0.1)
Requirement already satisfied: MarkupSafe>=2.1.1 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from werkzeug>=1.0.1->tensorboard<2.17,>=2.16->tensorflow<2.17,>=2.16.1->tensorflow_text) (2.1.5)
Requirement already satisfied: markdown-it-py>=2.2.0 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from rich->keras>=3.0.0->tensorflow<2.17,>=2.16.1->tensorflow_text) (3.0.0)
Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from rich->keras>=3.0.0->tensorflow<2.17,>=2.16.1->tensorflow_text) (2.17.2)
Requirement already satisfied: mdurl~=0.1 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich->keras>=3.0.0->tensorflow<2.17,>=2.16.1->tensorflow_text) (0.1.2)
Note: you may need to restart the kernel to use updated packages.
Requirement already satisfied: einops in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (0.7.0)
Note: you may need to restart the kernel to use updated packages.
Requirement already satisfied: matplotlib in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (3.8.3)
Requirement already satisfied: contourpy>=1.0.1 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from matplotlib) (1.2.0)
Requirement already satisfied: cycler>=0.10 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from matplotlib) (0.12.1)
Requirement already satisfied: fonttools>=4.22.0 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from matplotlib) (4.50.0)
Requirement already satisfied: kiwisolver>=1.3.1 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from matplotlib) (1.4.5)
Requirement already satisfied: numpy<2,>=1.21 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from matplotlib) (1.26.4)
Requirement already satisfied: packaging>=20.0 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from matplotlib) (24.0)
Requirement already satisfied: pillow>=8 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from matplotlib) (10.2.0)
Requirement already satisfied: pyparsing>=2.3.1 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from matplotlib) (3.1.2)
Requirement already satisfied: python-dateutil>=2.7 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from matplotlib) (2.9.0.post0)
Requirement already satisfied: six>=1.5 in /home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages (from python-dateutil>=2.7->matplotlib) (1.16.0)
Note: you may need to restart the kernel to use updated packages.
In [2]:
import numpy as np

import typing
from typing import Any, Tuple

import tensorflow as tf

import tensorflow_text as tf_text

import einops
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from tensorflow.python.client import device_lib

import pathlib
2024-03-27 22:51:22.925157: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-03-27 22:51:22.948845: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-03-27 22:51:23.318399: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
In [3]:
print(tf.__version__)

display(device_lib.list_local_devices())
display(tf.config.list_physical_devices("GPU"))
2.16.1
2024-03-27 22:51:23.731317: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-03-27 22:51:23.746485: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2251] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 10792337292684705361
 xla_global_id: -1]
2024-03-27 22:51:23.818930: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-03-27 22:51:23.818961: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2251] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
[]

ShapeChecker クラスの説明

このチュートリアルでは、低レベルのAPIを多用するため、テンソルの形状を間違えやすくなっています。ShapeChecker クラスは、チュートリアル全体を通してテンソルの形状を確認するために使用されます。

機能:

  • 形状の確認:与えられたテンソルの形状が期待される形状と一致するかをチェックします。
  • キャッシュの利用:一度見た変数の形状をキャッシュし、後続のチェックで利用します。
  • ブロードキャストの考慮:ブロードキャスト(形状の自動調整)が行われる場合、それを考慮に入れます。

メソッド:

  • __init__: クラスの初期化メソッド。軸名とその形状のキャッシュを保持するための辞書を作成します。
  • __call__: クラスの呼び出しメソッド。指定されたテンソルと軸名を受け取り、それらの形状をチェックします。

使用方法:

  1. テンソルと、そのテンソルの各軸に名前を割り当てます(例:"batch time")。
  2. このクラスを呼び出し、テンソルと軸名を渡します。
  3. テンソルの形状が以前に保存された形状と一致しない場合は、ValueError を発生させます。

このクラスは、形状の不一致によるエラーを早期に発見するのに役立ちます。

In [4]:
class ShapeChecker():
    def __init__(self):
        # Keep a cache of every axis-name seen
        self.shapes = {}
    
    def __call__(self, tensor: tf.Tensor, names: str, broadcast: bool = False):
        if not tf.executing_eagerly():
            return
        
        parsed = einops.parse_shape(tensor, names)

        for name, new_dim in parsed.items():
            old_dim = self.shapes.get(name, None)

            if (broadcast and new_dim == 1):
                continue

            if old_dim is None:
                # If the axis name is new, add its length to the cache
                self.shapes[name] = new_dim
                continue

            if new_dim != old_dim:
                raise ValueError(f"Shape mismatch for dimension: '{name}'\n"
                                 f"    found: {new_dim}"
                                 f"    expected: {old_dim}")

データセットのダウンロード

In [5]:
# Download the file
path_to_zip = tf.keras.utils.get_file(
    'spa-eng.zip', origin='http://storage.googleapis.com/download.tensorflow.org/data/spa-eng.zip',
    extract=True)

path_to_file = pathlib.Path(path_to_zip).parent / "spa-eng/spa.txt"
In [6]:
def load_data(path: str) -> Tuple[np.ndarray, np.ndarray]:
    """
    指定したパスからデータを読み込みます。

    Args:
        path: データが保存されているファイルのパス。
    
    Returns:
        Tuple[np.ndarray, np.ndarray]: 入力データと出力データのペア。
    """
    text = path.read_text(encoding='utf-8')

    lines = text.splitlines()
    pairs = [line.split('\t') for line in lines]

    # ペアは(英語の文章、スペイン語の文章)という形式になっているらしい。
    # ターゲットが左側にあることに違和感がある。
    context = np.array([context for target, context in pairs])
    target = np.array([target for target, context in pairs])

    return target, context
In [7]:
target_raw, context_raw = load_data(path_to_file)
print(context_raw[-1])
print(target_raw[-1])
Si quieres sonar como un hablante nativo, debes estar dispuesto a practicar diciendo la misma frase una y otra vez de la misma manera en que un músico de banjo practica el mismo fraseo una y otra vez hasta que lo puedan tocar correctamente y en el tiempo esperado.
If you want to sound like a native speaker, you must be willing to practice saying the same sentence over and over in the same way that banjo players practice the same phrase over and over until they can play it correctly and at the desired tempo.

tf.dataデータセットを作成

In [8]:
BUFFER_SIZE = len(context_raw)
BATCH_SIZE = 64

is_train = np.random.uniform(size=(len(target_raw),)) < 0.8

train_raw = (
    tf.data.Dataset
    .from_tensor_slices((context_raw[is_train], target_raw[is_train]))
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE))
val_raw = (
    tf.data.Dataset
    .from_tensor_slices((context_raw[~is_train], target_raw[~is_train]))
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE))
In [9]:
for example_context_strings, example_target_strings in train_raw.take(1):
    print(example_context_strings[:5])
    print()
    print(example_target_strings[:5])
    break
tf.Tensor(
[b'Soy racional.' b'En este almac\xc3\xa9n venden muy barato.'
 b'No me voy a callar.' b'Tengo justo lo que necesitas.'
 b'\xc3\x89l tuvo un accidente de tr\xc3\xa1fico de camino al colegio.'], shape=(5,), dtype=string)

tf.Tensor(
[b"I'm rational." b'They sell things very cheap in this store.'
 b"I won't stay silent." b'I have just what you need.'
 b'He had a traffic accident on his way to school.'], shape=(5,), dtype=string)

NFKD正規化

参考記事

In [10]:
example_text = tf.constant('¿Todavía está en casa?')

print(example_text.numpy())
print(example_text.numpy().decode())
print(tf_text.normalize_utf8(example_text, 'NFKD').numpy())
print(tf_text.normalize_utf8(example_text, 'NFKD').numpy().decode())
b'\xc2\xbfTodav\xc3\xada est\xc3\xa1 en casa?'
¿Todavía está en casa?
b'\xc2\xbfTodavi\xcc\x81a esta\xcc\x81 en casa?'
¿Todavía está en casa?

テキストの前処理を行う関数

In [11]:
def tf_lower_and_split_punct(text: tf.Tensor) -> tf.Tensor:
    """
    与えられたテキストの前処理を行います。

    この関数は以下の処理を行います:
    1. アクセント付きの文字を分割するために、NFKD正規化を使用します。
    2. テキストを全て小文字に変換します。
    3. スペース、アルファベット、一部の句読点 ( . , ! ? ¿ ) を除き、他の全ての文字を削除します。
    4. 句読点 ( . ? ! , ¿ ) の前後にスペースを挿入します。
    5. 連続するスペースを一つにまとめます。
    6. テキストの前後に '[START]' と '[END]' トークンを追加します。

    Args:
        text (tf.Tensor): 前処理を行うテキストが格納されたテンソル。
    
    Returns:
        tf.Tensor: 前処理が行われたテキストが格納されたテンソル。
    """
    # Split accented characters.
    text = tf_text.normalize_utf8(text, 'NFKD')
    text = tf.strings.lower(text)
    # Keep space, a to z, and select punctuation.
    text = tf.strings.regex_replace(text, '[^ a-z.?!,¿]', '')
    # Add spaces around punctuation.
    text = tf.strings.regex_replace(text, '[.?!,¿]', r' \0 ')
    # Strip white space.
    text = tf.strings.strip(text)

    text = tf.strings.join(['[START]', text, '[END]'], separator=' ')
    return text
In [12]:
print(example_text.numpy().decode())
print(tf_lower_and_split_punct(example_text).numpy().decode())
¿Todavía está en casa?
[START] ¿ todavia esta en casa ? [END]

テキストのトークン化

このコードは、TensorFlowの tf.keras.layers.TextVectorization レイヤーを使用して、テキストデータの前処理とトークン化を行うための設定をしています。具体的には以下の処理を行います:

  1. テキストの標準化 (Standardization):

    • tf_lower_and_split_punct 関数を使用して、テキストデータを標準化します。
  2. 語彙の抽出とトークン化 (Vocabulary Extraction and Tokenization):

    • このレイヤーは入力テキストから語彙(単語のセット)を抽出し、各単語をトークン(一意の数値ID)に変換します。これにより、テキストデータはニューラルネットワークが処理できる数値のシーケンスに変換されます。
  3. 最大語彙サイズの設定 (Setting the Maximum Vocabulary Size):

    • max_vocab_size = 5000 は、語彙に含める単語の最大数を 5000 に制限することを意味します。これにより、トレーニングの計算コストを抑えつつ、出現頻度が高い単語をカバーすることができます。
  4. ラギッドテンソル (Ragged Tensor):

    • ragged=True は、出力をラギッドテンソル(不規則な長さのテンソル)の形式で生成することを意味します。テキストデータは通常、異なる長さを持つため、このオプションにより各サンプルの長さを個別に扱うことができます。

TextVectorization レイヤーは、ニューラルネットワークが処理できる形式にテキストデータを自動的に変換するための強力なツールです。これにより、テキストデータの前処理とトークン化のプロセスが簡素化され、機械学習モデルのトレーニングが容易になります。

In [13]:
max_vocab_size = 5000

context_text_processor = tf.keras.layers.TextVectorization(
    standardize=tf_lower_and_split_punct,
    max_tokens=max_vocab_size,
    ragged=True)
In [14]:
context_text_processor.adapt(train_raw.map(lambda context, target: context))

# 先頭10個の語彙を表示。
context_text_processor.get_vocabulary()[:10]
2024-03-27 22:51:26.175263: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
Out[14]:
['', '[UNK]', '[START]', '[END]', '.', 'que', 'de', 'el', 'a', 'no']
In [15]:
# ターゲット用のベクトル化レイヤー。
target_text_processor = tf.keras.layers.TextVectorization(
    standardize=tf_lower_and_split_punct,
    max_tokens=max_vocab_size,
    ragged=True)

target_text_processor.adapt(train_raw.map(lambda context, target: target))
target_text_processor.get_vocabulary()[:10]
2024-03-27 22:51:27.917573: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
Out[15]:
['', '[UNK]', '[START]', '[END]', '.', 'the', 'i', 'to', 'you', 'tom']
In [16]:
# これらの層は、文字列のバッチをトークンID配列のバッチに変換できます。
example_tokens = context_text_processor(example_context_strings)
example_tokens[:3, :]
Out[16]:
<tf.RaggedTensor [[2, 101, 1, 4, 3], [2, 14, 40, 4651, 2174, 42, 1748, 4, 3],
 [2, 9, 18, 106, 8, 1, 4, 3]]>

トークンからテキストへの逆変換

In [17]:
context_vocab = np.array(context_text_processor.get_vocabulary())
tokens = context_vocab[example_tokens[0].numpy()]
' '.join(tokens)
Out[17]:
'[START] soy [UNK] . [END]'

トークンIDとマスクの可視化

マスクは、テキスト長を揃えるために使用されます。

In [18]:
plt.subplot(1, 2, 1)
plt.pcolormesh(example_tokens.to_tensor())
plt.title('Token IDs')

plt.subplot(1, 2, 2)
plt.pcolormesh(example_tokens.to_tensor() != 0)
_ = plt.title('Mask')
No description has been provided for this image

データセットの処理

以下の process_text 関数は、文字列のデータセットをトークンIDのテンソルに変換し、0でパディングします。さらに、(context, target)のペアを、Kerasの Model.fit でのトレーニングに適した ((context, target_in), target_out) のペアに変換します。

Kerasは入力として(inputs, labels)のペアを期待しています。ここでの入力は(context, target_in)であり、ラベルはtarget_outです。target_inとtarget_outの違いは、相互に対して一つのステップずれていることです。これにより、各位置でのラベルが次のトークンになります。

In [19]:
def process_text(context: tf.Tensor, target: tf.Tensor) -> Tuple[Tuple[tf.Tensor, tf.Tensor], tf.Tensor]:
    """
    与えられたコンテキストとターゲットを、`Model.fit` で使用できる形式に変換します。

    この関数は以下の処理を行います:
    1. コンテキストをトークン化し、固定長のテンソルに変換します。
    2. ターゲットをトークン化します。
    3. ターゲットテンソルの最後のトークンを除いたものを `targ_in` とし、最初のトークンを除いたものを `targ_out` とします。
    4. `(context, targ_in)` と `targ_out` のタプルを返します。

    Args:
        context (tf.Tensor): コンテキストが格納されたテンソル。
        target (tf.Tensor): ターゲットが格納されたテンソル。
    
    Returns:
        Tuple[Tuple[tf.Tensor, tf.Tensor], tf.Tensor]: `Model.fit` で使用できる形式のタプル。
    """
    context = context_text_processor(context).to_tensor()
    target = target_text_processor(target)
    targ_in = target[:,:-1].to_tensor() # 末尾以外のトークン
    targ_out = target[:,1:].to_tensor() # 先頭以外のトークン
    return (context, targ_in), targ_out

train_ds = train_raw.map(process_text, tf.data.AUTOTUNE)
val_ds = train_raw.map(process_text, tf.data.AUTOTUNE)
In [20]:
for (ex_context_tok, ex_tar_in), ex_tar_out in train_ds.take(1):
    print(ex_context_tok[0, :10].numpy())
    print()
    print(ex_tar_in[0, :10].numpy())
    print(ex_tar_out[0, :10].numpy())
[  2   7  49   8 979   4   3   0   0   0]

[  2  13 129   7 559   4   0   0   0   0]
[ 13 129   7 559   4   3   0   0   0   0]
2024-03-27 22:51:28.260899: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence

埋め込みベクトルの次元数

In [21]:
UNITS = 256

エンコーダ

In [22]:
class Encoder(tf.keras.layers.Layer):
    def __init__(self, text_processor: tf.keras.layers.TextVectorization, units: int):
        super(Encoder, self).__init__()
        self.text_processor = text_processor
        self.vocab_size = text_processor.vocabulary_size()
        self.units = units

        # 組み込みレイヤは、トークンIDをベクトルに変換します。
        self.embedding = tf.keras.layers.Embedding(self.vocab_size, self.units,
                                                   mask_zero=True)
        
        # RNNは、ベクトルのシーケンスを処理します。
        self.rnn = tf.keras.layers.Bidirectional(
            merge_mode='sum',
            layer=tf.keras.layers.GRU(units,
                                      return_sequences=True,
                                      recurrent_initializer='glorot_uniform'))
        
    
    def call(self, x: tf.Tensor) -> tf.Tensor:
        shape_checker = ShapeChecker()
        shape_checker(x, 'batch s')

        # トークンIDをベクトルに変換します。
        x = self.embedding(x)
        shape_checker(x, 'batch s units')

        # GRUは、ベクトルのシーケンスを処理します。
        x = self.rnn(x)
        shape_checker(x, 'batch s units')

        # 新しいベクトルのシーケンスを返します。
        return x
    
    def convert_input(self, texts: tf.RaggedTensor) -> tf.Tensor:
        texts = tf.convert_to_tensor(texts)
        if len(texts.shape) == 0:
            texts = tf.convert_to_tensor(texts)[tf.newaxis]
        context = self.text_processor(texts).to_tensor()
        context = self(context)
        return context
In [23]:
# Encode the input sequence.
encoder = Encoder(context_text_processor, UNITS)
ex_context = encoder(ex_context_tok)

print(f'Context tokens, shape (batch, s): {ex_context_tok.shape}')
print(f'Encoder output, shape (batch, s, units): {ex_context.shape}')
2024-03-27 22:51:28.394423: E tensorflow/core/util/util.cc:131] oneDNN supports DT_BOOL only on platforms with AVX-512. Falling back to the default Eigen-based implementation if present.
Context tokens, shape (batch, s): (64, 19)
Encoder output, shape (batch, s, units): (64, 19, 256)

アテンション層(CrossAttention クラス)

アテンション層は、デコーダにエンコーダからの情報へのアクセスを可能にする層です。この層は、コンテキストシーケンス全体から単一のベクトルを計算し、それをデコーダの出力に追加します。

機能:

  • 加重平均の計算
    • シーケンス全体から単一のベクトルを計算する最も単純な方法は、シーケンス全体の平均を取ることです。アテンション層はこれに似ていますが、コンテキストシーケンス全体にわたる加重平均を計算します。重みはコンテキストベクトルと「クエリ」ベクトルの組み合わせから計算されます。

コンポーネント:

  • MultiHeadAttention

    • クエリ(x)とコンテキスト(context)の間のアテンションを計算します。この例では、key_dim=unitsnum_heads=1 で設定されています。
  • LayerNormalization

    • アテンションの出力と元の入力を組み合わせた後に、レイヤー正規化を行います。
  • Add

    • アテンションの出力と元の入力を加算します。

メソッド:

  • call
    • 入力 x(クエリ)と context(コンテキスト)を受け取り、アテンションを適用します。
    • mha を用いてアテンションの出力とスコアを計算します。
    • アテンションスコアは平均化され、last_attention_weights に保存されます。
    • 出力は addlayernorm を通して最終的な結果に加工されます。

このアテンション層により、デコーダはコンテキストデータの重要な部分に焦点を合わせ、より効果的な学習と予測が可能になります。

In [24]:
class CrossAttention(tf.keras.layers.Layer):
    def __init__(self, units: int, **kwargs: Any):
        super().__init__()
        self.mha = tf.keras.layers.MultiHeadAttention(key_dim=units, num_heads=1, **kwargs)
        self.layernorm = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.add = tf.keras.layers.Add()

    def call(self, x: tf.Tensor, context: tf.Tensor):
        shape_checker = ShapeChecker()

        shape_checker(x, 'batch t units')
        shape_checker(context, 'batch s units')

        attn_output, attn_scores = self.mha(
            query=x,
            value=context,
            return_attention_scores=True)
        
        shape_checker(x, 'batch t units')
        shape_checker(attn_scores, 'batch heads t s')

        # 後にプロットするために注意の重みを保持します。
        attn_scores = tf.reduce_mean(attn_scores, axis=1)
        shape_checker(attn_scores, 'batch t s')
        self.last_attention_weights = attn_scores

        x = self.add([x, attn_output])
        x = self.layernorm(x)

        return x
In [25]:
attention_layer = CrossAttention(UNITS)

# 
embed = tf.keras.layers.Embedding(target_text_processor.vocabulary_size(),
                                  output_dim=UNITS, mask_zero=True)

ex_tar_embed = embed(ex_tar_in)

result = attention_layer(ex_tar_embed, ex_context)

print(f'Context sequence, shape (batch, s, units): {ex_context.shape}')
print(f'Target sequence, shape (batch, t, units) : {ex_tar_embed.shape}')
print(f'Attention result, shape (batch, t, units): {result.shape}')
print(f'Attention weights, shape (batch, t, s)   : {attention_layer.last_attention_weights.shape}')
Context sequence, shape (batch, s, units): (64, 19, 256)
Target sequence, shape (batch, t, units) : (64, 19, 256)
Attention result, shape (batch, t, units): (64, 19, 256)
Attention weights, shape (batch, t, s)   : (64, 19, 19)
/home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages/keras/src/layers/layer.py:845: UserWarning: Layer 'query' (of type EinsumDense) was passed an input with a mask attached to it. However, this layer does not support masking and will therefore destroy the mask information. Downstream layers will not see the mask.
  warnings.warn(
/home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages/keras/src/layers/layer.py:845: UserWarning: Layer 'key' (of type EinsumDense) was passed an input with a mask attached to it. However, this layer does not support masking and will therefore destroy the mask information. Downstream layers will not see the mask.
  warnings.warn(
/home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages/keras/src/layers/layer.py:845: UserWarning: Layer 'value' (of type EinsumDense) was passed an input with a mask attached to it. However, this layer does not support masking and will therefore destroy the mask information. Downstream layers will not see the mask.
  warnings.warn(
/home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages/keras/src/layers/layer.py:845: UserWarning: Layer 'cross_attention' (of type CrossAttention) was passed an input with a mask attached to it. However, this layer does not support masking and will therefore destroy the mask information. Downstream layers will not see the mask.
  warnings.warn(
In [26]:
# 確率分布なのでValueごとの確率を合計すると1になる?
attention_layer.last_attention_weights[0].numpy().sum(axis=-1)
Out[26]:
array([1.        , 1.        , 0.99999994, 1.0000001 , 1.        ,
       1.0000001 , 0.99999994, 0.99999994, 0.99999994, 0.99999994,
       0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
       0.99999994, 0.99999994, 0.99999994, 0.99999994], dtype=float32)
In [27]:
attention_weights = attention_layer.last_attention_weights
mask = (ex_context_tok != 0).numpy()

plt.subplot(1, 2, 1)
plt.pcolormesh(mask * attention_weights[:, 0, :])
plt.title('Attention weights')

plt.subplot(1, 2, 2)
plt.pcolormesh(mask)
plt.title('Mask')
Out[27]:
Text(0.5, 1.0, 'Mask')
No description has been provided for this image

デコーダ

機能:

  1. ターゲットシーケンスの処理:
    • ターゲットシーケンス内の各トークンに対して埋め込みを検索します。
  2. RNNの使用:
    • RNN(ここではGRU)を使用してターゲットシーケンスを処理し、これまでに生成した内容を追跡します。
  3. アテンション層との連携:
    • RNNの出力をアテンション層の「クエリ」として使用し、エンコーダの出力に注目します。
  4. 次のトークンの予測:
    • 出力シーケンスの各位置で次のトークンを予測します。

コンポーネント:

  • テキストプロセッサ:
    • TextVectorization レイヤを使用して、テキストをトークンIDに変換します。
  • 単語とIDの変換レイヤ:
    • StringLookup レイヤを使用して、単語をIDに、IDを単語に変換します。
  • 埋め込みレイヤ (Embedding):
    • トークンIDを密なベクトルに変換します。
  • RNNレイヤ (GRU):
    • シーケンスデータの処理に使用され、シーケンス全体の出力と最終状態を返します。
  • アテンションレイヤ (CrossAttention):
    • エンコーダの出力に対するアテンションを計算します。
  • 出力レイヤ (Dense):
    • RNNの出力を語彙サイズのlogitsに変換します。

モデルの動作:

  • トレーニング時:
    • モデルは各位置で次の単語を予測します。情報が一方向にのみ流れるようにすることが重要です。そのため、デコーダはターゲットシーケンスを処理するために単方向(非双方向)のRNNを使用します。
  • 推論時:
    • モデルは一度に一つの単語を生成し、それをモデルにフィードバックします。
In [28]:
class Decoder(tf.keras.layers.Layer):
    @classmethod
    def add_method(cls, fun):
        """
        後からメソッドを追加するためのデコレータです。

        Args:
            fun: 追加するメソッド。
        
        Returns:
            Any: 追加されたメソッド。
        """
        setattr(cls, fun.__name__, fun)
        return fun

    def __init__(self, text_processor, units):
        super(Decoder, self).__init__()
        self.text_processor = text_processor
        self.vocab_size = text_processor.vocabulary_size()
        self.word_to_id = tf.keras.layers.StringLookup(
            vocabulary=text_processor.get_vocabulary(),
            mask_token='', oov_token='[UNK]')
        self.id_to_word = tf.keras.layers.StringLookup(
            vocabulary=text_processor.get_vocabulary(),
            mask_token='', oov_token='[UNK]',
            invert=True)
        self.start_token = self.word_to_id('[START]')
        self.end_token = self.word_to_id('[END]')

        self.units = units

        # 1. The embedding layer converts token IDs to vectors
        self.embedding = tf.keras.layers.Embedding(self.vocab_size,
                                               units, mask_zero=True)

        # 2. The RNN keeps track of what's been generated so far.
        # どうやらGPUを使うと、return_state=Trueで不具合が起きるらしい。
        # 代わりに、return_sequences=Trueで返る末尾の値を使えばいいのでは?
        self.rnn = tf.keras.layers.GRU(units,
                                   return_sequences=True,
                                   return_state=False, # TrueだとGPUでエラーが起きる。
                                   recurrent_initializer='glorot_uniform')

        # 3. The RNN output will be the query for the attention layer.
        self.attention = CrossAttention(units)

        # 4. This fully connected layer produces the logits for each
        # output token.
        self.output_layer = tf.keras.layers.Dense(self.vocab_size)
In [29]:
@Decoder.add_method
def call(self,
         context, x,
         state=None,
         return_state=False):  
    shape_checker = ShapeChecker()
    shape_checker(x, 'batch t')
    shape_checker(context, 'batch s units')

    # 1. Lookup the embeddings
    x = self.embedding(x)
    shape_checker(x, 'batch t units')

    # 2. Process the target sequence.

    # GPUだとここでなぜかエラーになる。
    # x, state = self.rnn(x, initial_state=state)
    x = self.rnn(x, initial_state=state)
    # 末尾の要素を取得する。
    state = x[:, -1, :]
    
    shape_checker(x, 'batch t units')

    # 3. Use the RNN output as the query for the attention over the context.
    x = self.attention(x, context)
    self.last_attention_weights = self.attention.last_attention_weights
    shape_checker(x, 'batch t units')
    shape_checker(self.last_attention_weights, 'batch t s')

    # Step 4. Generate logit predictions for the next token.
    logits = self.output_layer(x)
    shape_checker(logits, 'batch t target_vocab_size')

    if return_state:
        return logits, state
    else:
        return logits
In [30]:
decoder = Decoder(target_text_processor, UNITS)

logits = decoder(ex_context, ex_tar_in)

print(f'encoder output shape: (batch, s, units)            : {ex_context.shape}')
print(f'input target tokens shape: (batch, t)              : {ex_tar_in.shape}')
print(f'logits shape shape: (batch, target_vocabulary_size): {logits.shape}')
encoder output shape: (batch, s, units)            : (64, 19, 256)
input target tokens shape: (batch, t)              : (64, 19)
logits shape shape: (batch, target_vocabulary_size): (64, 19, 5000)
/home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages/keras/src/layers/layer.py:845: UserWarning: Layer 'cross_attention_1' (of type CrossAttention) was passed an input with a mask attached to it. However, this layer does not support masking and will therefore destroy the mask information. Downstream layers will not see the mask.
  warnings.warn(
/home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages/keras/src/layers/layer.py:357: UserWarning: `build()` was called on layer 'decoder', however the layer does not have a `build()` method implemented and it looks like it has unbuilt state. This will cause the layer to be marked as built, despite not being actually built, which may cause failures down the line. Make sure to implement a proper `build()` method.
  warnings.warn(
/home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages/keras/src/layers/layer.py:845: UserWarning: Layer 'decoder' (of type Decoder) was passed an input with a mask attached to it. However, this layer does not support masking and will therefore destroy the mask information. Downstream layers will not see the mask.
  warnings.warn(
In [31]:
@Decoder.add_method
def get_initial_state(self, context):
    batch_size = tf.shape(context)[0]
    start_tokens = tf.fill([batch_size, 1], self.start_token)
    done = tf.zeros([batch_size, 1], dtype=tf.bool)
    # embedded = self.embedding(start_tokens)
    return start_tokens, done, self.rnn.get_initial_state(batch_size=batch_size)[0]
In [32]:
@Decoder.add_method
def tokens_to_text(self, tokens: tf.Tensor):
    words = self.id_to_word(tokens)
    result = tf.strings.reduce_join(words, axis=-1, separator=' ')
    result = tf.strings.regex_replace(result, '^ *\[START\] *', '')
    result = tf.strings.regex_replace(result, ' *\[END\] *$', '')
    return result
In [33]:
@Decoder.add_method
def get_next_token(self, context: tf.Tensor, next_token: tf.Tensor, done: tf.Tensor, state: tf.Tensor, temperature: float = 0.0) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
    """
    次のトークンを生成し、デコーダの状態を更新します。

    Args:
        context (tf.Tensor): デコーダへの入力コンテキスト。
        next_token (tf.Tensor): 生成する次のトークン。
        done (tf.Tensor): すでに終了トークンが生成されたかどうかを示すブールテンソル。
        state (tf.Tensor): デコーダの現在の状態。
        temperature (float): 出力のランダム性を調整するための温度パラメータ。デフォルトは0.0 (最も確信度の高いトークンを選択)。
    
    Returns:
        Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: 生成された次のトークン、終了フラグ、更新された状態。
    """
    logits, state = self(
        context, next_token,
        state = state,
        return_state = True)
    
    if temperature == 0.0:
        next_token = tf.argmax(logits, axis=-1)
    else:
        logits = logits[:, -1, :] / temperature
        next_token = tf.random.categorical(logits, num_samples=1)

    # 終了トークンの場合、終了フラグを設定します。
    done = done | (next_token == self.end_token)
    next_token = tf.where(done, tf.constant(0, dtype=tf.int64), next_token)

    return next_token, done, state
In [34]:
next_token, done, state = decoder.get_initial_state(ex_context)
tokens = []

for n in range(10):
    # 1ステップ走らせます。
    next_token, done, state = decoder.get_next_token(
        ex_context, next_token, done, state, temperature=1.0)
    tokens.append(next_token)

# 全てのトークンを連結。
tokens = tf.concat(tokens, axis=-1) # (batch, t)

# トークンをテキストに変換。
result = decoder.tokens_to_text(tokens)
result[:3].numpy()
Out[34]:
array([b'scolded checks scared disappointed society trouble aspirin agreement haircut prevented',
       b'rude traditional earth recipe awake eyebrows sofa astute storm opera',
       b'horses exercise meet june walks cases waves several engine divorce'],
      dtype=object)

モデル

In [35]:
class Translator(tf.keras.Model):
    @classmethod
    def add_method(cls, fun):
        setattr(cls, fun.__name__, fun)
        return fun

    def __init__(self, units,
                context_text_processor,
                target_text_processor):
        super().__init__()
        # Build the encoder and decoder
        encoder = Encoder(context_text_processor, units)
        decoder = Decoder(target_text_processor, units)

        self.encoder = encoder
        self.decoder = decoder

    def call(self, inputs):
        context, x = inputs
        context = self.encoder(context)
        logits = self.decoder(context, x)

        #TODO(b/250038731): remove this
        try:
            # Delete the keras mask, so keras doesn't scale the loss+accuracy. 
            del logits._keras_mask
        except AttributeError:
            pass

        return logits
In [36]:
model = Translator(UNITS, context_text_processor, target_text_processor)

logits = model((ex_context_tok, ex_tar_in))

print(f'Context tokens, shape: (batch, s, units)         : {ex_context_tok.shape}')
print(f'Target tokens, shape: (batch, t)                 : {ex_tar_in.shape}')
print(f'logits, shape: (batch, t, target_vocabulary_size): {logits.shape}')
/home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages/keras/src/layers/layer.py:845: UserWarning: Layer 'cross_attention_2' (of type CrossAttention) was passed an input with a mask attached to it. However, this layer does not support masking and will therefore destroy the mask information. Downstream layers will not see the mask.
  warnings.warn(
/home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages/keras/src/layers/layer.py:357: UserWarning: `build()` was called on layer 'decoder_1', however the layer does not have a `build()` method implemented and it looks like it has unbuilt state. This will cause the layer to be marked as built, despite not being actually built, which may cause failures down the line. Make sure to implement a proper `build()` method.
  warnings.warn(
/home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages/keras/src/layers/layer.py:845: UserWarning: Layer 'decoder_1' (of type Decoder) was passed an input with a mask attached to it. However, this layer does not support masking and will therefore destroy the mask information. Downstream layers will not see the mask.
  warnings.warn(
Context tokens, shape: (batch, s, units)         : (64, 19)
Target tokens, shape: (batch, t)                 : (64, 19)
logits, shape: (batch, t, target_vocabulary_size): (64, 19, 5000)
In [37]:
def masked_loss(y_true, y_pred):
    # Calculate the loss for each item in the batch.
    loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(
        from_logits=True, reduction='none')
    loss = loss_fn(y_true, y_pred)

    # Mask off the losses on padding.
    mask = tf.cast(y_true != 0, loss.dtype)
    loss *= mask

    # Return the total.
    return tf.reduce_sum(loss)/tf.reduce_sum(mask)
In [38]:
def masked_acc(y_true, y_pred):
    # Calculate the loss for each item in the batch.
    y_pred = tf.argmax(y_pred, axis=-1)
    y_pred = tf.cast(y_pred, y_true.dtype)

    match = tf.cast(y_true == y_pred, tf.float32)
    mask = tf.cast(y_true != 0, tf.float32)

    return tf.reduce_sum(match)/tf.reduce_sum(mask)
In [39]:
model.compile(optimizer='adam',
              loss=masked_loss, 
              metrics=[masked_acc, masked_loss])
In [40]:
vocab_size = 1.0 * target_text_processor.vocabulary_size()

{"expected_loss": tf.math.log(vocab_size).numpy(),
 "expected_acc": 1/vocab_size}
Out[40]:
{'expected_loss': 8.517193, 'expected_acc': 0.0002}
In [41]:
model.evaluate(val_ds, steps=20, return_dict=True)
20/20 ━━━━━━━━━━━━━━━━━━━━ 1s 21ms/step - loss: 8.5614 - masked_acc: 0.0000e+00 - masked_loss: 8.5614
Out[41]:
{'loss': 8.562967300415039,
 'masked_acc': 0.0,
 'masked_loss': 8.562967300415039}

学習

In [42]:
history = model.fit(
    train_ds.repeat(), 
    epochs=100,
    steps_per_epoch = 100,
    validation_data=val_ds,
    validation_steps = 20,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(patience=3)])
Epoch 1/100
100/100 ━━━━━━━━━━━━━━━━━━━━ 9s 67ms/step - loss: 5.5338 - masked_acc: 0.2604 - masked_loss: 5.5338 - val_loss: 3.7808 - val_masked_acc: 0.3964 - val_masked_loss: 3.7808
Epoch 2/100
100/100 ━━━━━━━━━━━━━━━━━━━━ 7s 65ms/step - loss: 3.5830 - masked_acc: 0.4172 - masked_loss: 3.5830 - val_loss: 3.0688 - val_masked_acc: 0.4821 - val_masked_loss: 3.0688
Epoch 3/100
100/100 ━━━━━━━━━━━━━━━━━━━━ 7s 67ms/step - loss: 3.0289 - masked_acc: 0.4931 - masked_loss: 3.0289 - val_loss: 2.7194 - val_masked_acc: 0.5323 - val_masked_loss: 2.7194
Epoch 4/100
100/100 ━━━━━━━━━━━━━━━━━━━━ 7s 68ms/step - loss: 2.6766 - masked_acc: 0.5383 - masked_loss: 2.6766 - val_loss: 2.2848 - val_masked_acc: 0.5914 - val_masked_loss: 2.2848
Epoch 5/100
100/100 ━━━━━━━━━━━━━━━━━━━━ 6s 65ms/step - loss: 2.3603 - masked_acc: 0.5791 - masked_loss: 2.3603 - val_loss: 2.1136 - val_masked_acc: 0.6179 - val_masked_loss: 2.1136
Epoch 6/100
100/100 ━━━━━━━━━━━━━━━━━━━━ 6s 62ms/step - loss: 2.1397 - masked_acc: 0.6131 - masked_loss: 2.1397 - val_loss: 1.8215 - val_masked_acc: 0.6584 - val_masked_loss: 1.8215
Epoch 7/100
100/100 ━━━━━━━━━━━━━━━━━━━━ 6s 64ms/step - loss: 1.9505 - masked_acc: 0.6456 - masked_loss: 1.9505 - val_loss: 1.6427 - val_masked_acc: 0.6802 - val_masked_loss: 1.6427
Epoch 8/100
100/100 ━━━━━━━━━━━━━━━━━━━━ 6s 63ms/step - loss: 1.8172 - masked_acc: 0.6616 - masked_loss: 1.8172 - val_loss: 1.5507 - val_masked_acc: 0.6951 - val_masked_loss: 1.5507
Epoch 9/100
100/100 ━━━━━━━━━━━━━━━━━━━━ 6s 64ms/step - loss: 1.7437 - masked_acc: 0.6646 - masked_loss: 1.7437 - val_loss: 1.4957 - val_masked_acc: 0.6952 - val_masked_loss: 1.4957
Epoch 10/100
100/100 ━━━━━━━━━━━━━━━━━━━━ 6s 64ms/step - loss: 1.6462 - masked_acc: 0.6864 - masked_loss: 1.6462 - val_loss: 1.4408 - val_masked_acc: 0.7135 - val_masked_loss: 1.4408
Epoch 11/100
100/100 ━━━━━━━━━━━━━━━━━━━━ 6s 63ms/step - loss: 1.5560 - masked_acc: 0.6996 - masked_loss: 1.5560 - val_loss: 1.3784 - val_masked_acc: 0.7218 - val_masked_loss: 1.3784
Epoch 12/100
100/100 ━━━━━━━━━━━━━━━━━━━━ 6s 65ms/step - loss: 1.5237 - masked_acc: 0.7030 - masked_loss: 1.5237 - val_loss: 1.2520 - val_masked_acc: 0.7368 - val_masked_loss: 1.2520
Epoch 13/100
100/100 ━━━━━━━━━━━━━━━━━━━━ 6s 65ms/step - loss: 1.4774 - masked_acc: 0.7109 - masked_loss: 1.4774 - val_loss: 1.2512 - val_masked_acc: 0.7424 - val_masked_loss: 1.2512
Epoch 14/100
100/100 ━━━━━━━━━━━━━━━━━━━━ 6s 64ms/step - loss: 1.4515 - masked_acc: 0.7087 - masked_loss: 1.4515 - val_loss: 1.1837 - val_masked_acc: 0.7476 - val_masked_loss: 1.1837
Epoch 15/100
100/100 ━━━━━━━━━━━━━━━━━━━━ 6s 65ms/step - loss: 1.4526 - masked_acc: 0.7105 - masked_loss: 1.4526 - val_loss: 1.2761 - val_masked_acc: 0.7252 - val_masked_loss: 1.2761
Epoch 16/100
100/100 ━━━━━━━━━━━━━━━━━━━━ 6s 64ms/step - loss: 1.1982 - masked_acc: 0.7427 - masked_loss: 1.1982 - val_loss: 1.1069 - val_masked_acc: 0.7584 - val_masked_loss: 1.1069
Epoch 17/100
100/100 ━━━━━━━━━━━━━━━━━━━━ 6s 64ms/step - loss: 1.1585 - masked_acc: 0.7490 - masked_loss: 1.1585 - val_loss: 1.1370 - val_masked_acc: 0.7585 - val_masked_loss: 1.1370
Epoch 18/100
100/100 ━━━━━━━━━━━━━━━━━━━━ 6s 64ms/step - loss: 1.2142 - masked_acc: 0.7437 - masked_loss: 1.2142 - val_loss: 1.0566 - val_masked_acc: 0.7705 - val_masked_loss: 1.0566
Epoch 19/100
100/100 ━━━━━━━━━━━━━━━━━━━━ 6s 65ms/step - loss: 1.1485 - masked_acc: 0.7511 - masked_loss: 1.1485 - val_loss: 1.0418 - val_masked_acc: 0.7672 - val_masked_loss: 1.0418
Epoch 20/100
100/100 ━━━━━━━━━━━━━━━━━━━━ 6s 63ms/step - loss: 1.1483 - masked_acc: 0.7548 - masked_loss: 1.1483 - val_loss: 1.0190 - val_masked_acc: 0.7745 - val_masked_loss: 1.0190
Epoch 21/100
100/100 ━━━━━━━━━━━━━━━━━━━━ 7s 66ms/step - loss: 1.1644 - masked_acc: 0.7515 - masked_loss: 1.1644 - val_loss: 1.0254 - val_masked_acc: 0.7739 - val_masked_loss: 1.0254
Epoch 22/100
100/100 ━━━━━━━━━━━━━━━━━━━━ 7s 65ms/step - loss: 1.1385 - masked_acc: 0.7539 - masked_loss: 1.1385 - val_loss: 1.0227 - val_masked_acc: 0.7746 - val_masked_loss: 1.0227
Epoch 23/100
100/100 ━━━━━━━━━━━━━━━━━━━━ 7s 66ms/step - loss: 1.1077 - masked_acc: 0.7619 - masked_loss: 1.1077 - val_loss: 1.0207 - val_masked_acc: 0.7765 - val_masked_loss: 1.0207
In [43]:
plt.plot(history.history['loss'], label='loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.ylim([0, max(plt.ylim())])
plt.xlabel('Epoch #')
plt.ylabel('CE/token')
plt.legend()
Out[43]:
<matplotlib.legend.Legend at 0x7f2811076ec0>
No description has been provided for this image
In [44]:
plt.plot(history.history['masked_acc'], label='accuracy')
plt.plot(history.history['val_masked_acc'], label='val_accuracy')
plt.ylim([0, max(plt.ylim())])
plt.xlabel('Epoch #')
plt.ylabel('CE/token')
plt.legend()
Out[44]:
<matplotlib.legend.Legend at 0x7f27846d49a0>
No description has been provided for this image
In [45]:
@Translator.add_method
def translate(self,
                texts, *,
                max_length=50,
                temperature=0.0):
    # Process the input texts
    context = self.encoder.convert_input(texts)
    batch_size = tf.shape(texts)[0]

    # Setup the loop inputs
    tokens = []
    attention_weights = []
    next_token, done, state = self.decoder.get_initial_state(context)

    for _ in range(max_length):
        # Generate the next token
        next_token, done, state = self.decoder.get_next_token(
            context, next_token, done,  state, temperature)

        # Collect the generated tokens
        tokens.append(next_token)
        attention_weights.append(self.decoder.last_attention_weights)

        if tf.executing_eagerly() and tf.reduce_all(done):
            break

    # Stack the lists of tokens and attention weights.
    tokens = tf.concat(tokens, axis=-1)   # t*[(batch 1)] -> (batch, t)
    self.last_attention_weights = tf.concat(attention_weights, axis=1)  # t*[(batch 1 s)] -> (batch, t s)

    result = self.decoder.tokens_to_text(tokens)
    return result
In [46]:
result = model.translate(['¿Todavía está en casa?']) # Are you still home
result[0].numpy().decode()
Out[46]:
'is still home ? '
In [47]:
@Translator.add_method
def plot_attention(self, text, **kwargs):
    assert isinstance(text, str)
    output = self.translate([text], **kwargs)
    output = output[0].numpy().decode()

    attention = self.last_attention_weights[0]

    context = tf_lower_and_split_punct(text)
    context = context.numpy().decode().split()

    output = tf_lower_and_split_punct(output)
    output = output.numpy().decode().split()[1:]

    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(1, 1, 1)

    ax.matshow(attention, cmap='viridis', vmin=0.0)

    fontdict = {'fontsize': 14}

    ax.set_xticklabels([''] + context, fontdict=fontdict, rotation=90)
    ax.set_yticklabels([''] + output, fontdict=fontdict)

    ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
    ax.yaxis.set_major_locator(ticker.MultipleLocator(1))

    ax.set_xlabel('Input text')
    ax.set_ylabel('Output text')
In [48]:
model.plot_attention('¿Todavía está en casa?') # Are you still home
/tmp/ipykernel_66485/3440379237.py:22: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax.set_xticklabels([''] + context, fontdict=fontdict, rotation=90)
/tmp/ipykernel_66485/3440379237.py:23: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax.set_yticklabels([''] + output, fontdict=fontdict)
No description has been provided for this image
In [49]:
long_text = context_raw[-1]

import textwrap
print('Expected output:\n', '\n'.join(textwrap.wrap(target_raw[-1])))
Expected output:
 If you want to sound like a native speaker, you must be willing to
practice saying the same sentence over and over in the same way that
banjo players practice the same phrase over and over until they can
play it correctly and at the desired tempo.
In [50]:
model.plot_attention(long_text)
/home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages/keras/src/layers/layer.py:845: UserWarning: Layer 'query' (of type EinsumDense) was passed an input with a mask attached to it. However, this layer does not support masking and will therefore destroy the mask information. Downstream layers will not see the mask.
  warnings.warn(
/home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages/keras/src/layers/layer.py:845: UserWarning: Layer 'key' (of type EinsumDense) was passed an input with a mask attached to it. However, this layer does not support masking and will therefore destroy the mask information. Downstream layers will not see the mask.
  warnings.warn(
/home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages/keras/src/layers/layer.py:845: UserWarning: Layer 'value' (of type EinsumDense) was passed an input with a mask attached to it. However, this layer does not support masking and will therefore destroy the mask information. Downstream layers will not see the mask.
  warnings.warn(
/home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages/keras/src/layers/layer.py:845: UserWarning: Layer 'cross_attention_2' (of type CrossAttention) was passed an input with a mask attached to it. However, this layer does not support masking and will therefore destroy the mask information. Downstream layers will not see the mask.
  warnings.warn(
/home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages/keras/src/layers/layer.py:845: UserWarning: Layer 'decoder_1' (of type Decoder) was passed an input with a mask attached to it. However, this layer does not support masking and will therefore destroy the mask information. Downstream layers will not see the mask.
  warnings.warn(
/tmp/ipykernel_66485/3440379237.py:22: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax.set_xticklabels([''] + context, fontdict=fontdict, rotation=90)
/tmp/ipykernel_66485/3440379237.py:23: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax.set_yticklabels([''] + output, fontdict=fontdict)
No description has been provided for this image
In [51]:
inputs = [
    'Hace mucho frio aqui.', # "It's really cold here."
    'Esta es mi vida.', # "This is my life."
    'Su cuarto es un desastre.' # "His room is a mess"
]

複数の文章を翻訳する場合、まとめて翻訳した方が速い

In [52]:
%%time
for t in inputs:
  print(model.translate([t])[0].numpy().decode())

print()
/home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages/keras/src/layers/layer.py:845: UserWarning: Layer 'query' (of type EinsumDense) was passed an input with a mask attached to it. However, this layer does not support masking and will therefore destroy the mask information. Downstream layers will not see the mask.
  warnings.warn(
/home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages/keras/src/layers/layer.py:845: UserWarning: Layer 'key' (of type EinsumDense) was passed an input with a mask attached to it. However, this layer does not support masking and will therefore destroy the mask information. Downstream layers will not see the mask.
  warnings.warn(
/home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages/keras/src/layers/layer.py:845: UserWarning: Layer 'value' (of type EinsumDense) was passed an input with a mask attached to it. However, this layer does not support masking and will therefore destroy the mask information. Downstream layers will not see the mask.
  warnings.warn(
/home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages/keras/src/layers/layer.py:845: UserWarning: Layer 'cross_attention_2' (of type CrossAttention) was passed an input with a mask attached to it. However, this layer does not support masking and will therefore destroy the mask information. Downstream layers will not see the mask.
  warnings.warn(
/home/sonoda/GitHub/investment-study/.tf/lib/python3.10/site-packages/keras/src/layers/layer.py:845: UserWarning: Layer 'decoder_1' (of type Decoder) was passed an input with a mask attached to it. However, this layer does not support masking and will therefore destroy the mask information. Downstream layers will not see the mask.
  warnings.warn(
its very cold here . 
this is my life . 
his room is a mess . 

CPU times: user 271 ms, sys: 1.46 ms, total: 272 ms
Wall time: 257 ms
In [53]:
%%time
result = model.translate(inputs)

print(result[0].numpy().decode())
print(result[1].numpy().decode())
print(result[2].numpy().decode())
print()
its very cold here .  
this is my life .  
his room is a mess . 

CPU times: user 133 ms, sys: 505 µs, total: 134 ms
Wall time: 113 ms

エクスポート可能にする

In [54]:
class Export(tf.Module):
    def __init__(self, model):
        self.model = model
    
    @tf.function(input_signature=[tf.TensorSpec(dtype=tf.string, shape=[None])])
    def translate(self, inputs):
        return self.model.translate(inputs)
In [55]:
export = Export(model)
In [56]:
%%time
_ = export.translate(tf.constant(inputs))
WARNING: All log messages before absl::InitializeLog() is called are written to STDERR
W0000 00:00:1711547647.699238   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.699648   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.700043   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.700374   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.700707   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.701033   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.701427   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.701766   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.702101   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.702460   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.702967   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.703342   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.703690   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.704030   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.704375   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.704715   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.705067   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.705417   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.705775   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.706125   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.706478   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.706831   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.707180   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.707539   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.707896   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.708278   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.708620   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.709804   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.710208   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.710586   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.710911   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.711232   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.711563   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.711890   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.712212   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.712534   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.712854   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.713183   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.713508   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.713835   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.714159   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.714481   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.714803   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.715136   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.715469   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.715793   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.716114   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.716455   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.716779   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1711547647.717108   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
CPU times: user 4.92 s, sys: 188 ms, total: 5.11 s
Wall time: 4.97 s
In [57]:
%%time
result = export.translate(tf.constant(inputs))

print(result[0].numpy().decode())
print(result[1].numpy().decode())
print(result[2].numpy().decode())
print()
its very cold here .                                             
this is my life .                                             
his room is a mess .                                            

CPU times: user 84.1 ms, sys: 123 ms, total: 207 ms
Wall time: 57.6 ms

より効率的な実装

In [58]:
@Translator.add_method
def translate(self, texts, *, max_length=500, temperature=tf.constant(0.0)):
    shape_checker = ShapeChecker()
    context = self.encoder.convert_input(texts)
    batch_size = tf.shape(context)[0]
    shape_checker(context, 'batch s units')

    next_token, done, state = self.decoder.get_initial_state(context)

    # initialize the accumulator
    tokens = tf.TensorArray(tf.int64, size=1, dynamic_size=True)

    for t in tf.range(max_length):
        # Generate the next token
        next_token, done, state = self.decoder.get_next_token(
            context, next_token, done, state, temperature)
        shape_checker(next_token, 'batch t1')

        # Collect the generated tokens
        tokens = tokens.write(t, next_token)

        # if all the sequences are done, break
        if tf.reduce_all(done):
            break
    
    # Convert the list of generated token ids to a list of strings.
    tokens = tokens.stack()
    shape_checker(tokens, 't batch t1')
    tokens = einops.rearrange(tokens, 't batch 1 -> batch t')
    shape_checker(tokens, 'batch t')

    text = self.decoder.tokens_to_text(tokens)
    shape_checker(text, 'batch')

    return text
In [59]:
%%time
result = model.translate(inputs)

print(result[0].numpy().decode())
print(result[1].numpy().decode())
print(result[2].numpy().decode())
print()
its very cold here .  
this is my life .  
his room is a mess . 

CPU times: user 132 ms, sys: 5.54 ms, total: 138 ms
Wall time: 117 ms

tf.Functionを使ったときに速度に差が出る

In [60]:
# このクラスの実装は前のものと同じ。
class Export(tf.Module):
    def __init__(self, model):
        self.model = model
    
    @tf.function(input_signature=[tf.TensorSpec(dtype=tf.string, shape=[None])])
    def translate(self, inputs):
        return self.model.translate(inputs)
In [61]:
export = Export(model)
In [62]:
%%time
_ = export.translate(inputs)
CPU times: user 795 ms, sys: 76.7 ms, total: 872 ms
Wall time: 841 ms
W0000 00:00:1711547649.698592   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
In [63]:
%%time
result = export.translate(tf.constant(inputs))

print(result[0].numpy().decode())
print(result[1].numpy().decode())
print(result[2].numpy().decode())
print()
its very cold here .  
this is my life .  
his room is a mess . 

CPU times: user 0 ns, sys: 43.3 ms, total: 43.3 ms
Wall time: 12 ms

モデルの保存

In [64]:
%%time
tf.saved_model.save(export, 'dynamic_translator',
                    signatures={'serving_default': export.translate})
INFO:tensorflow:Assets written to: dynamic_translator/assets
INFO:tensorflow:Assets written to: dynamic_translator/assets
CPU times: user 635 ms, sys: 40.8 ms, total: 675 ms
Wall time: 675 ms

モデルのロード

In [65]:
%%time
reloaded = tf.saved_model.load('dynamic_translator')
_ = reloaded.translate(tf.constant(inputs)) # warmup
CPU times: user 413 ms, sys: 12.3 ms, total: 425 ms
Wall time: 394 ms
W0000 00:00:1711547650.801973   66485 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "103" frequency: 3417 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 49152 l2_cache_size: 2097152 l3_cache_size: 34603008 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
In [66]:
%%time
result = reloaded.translate(tf.constant(inputs))

print(result[0].numpy().decode())
print(result[1].numpy().decode())
print(result[2].numpy().decode())
print()
its very cold here .  
this is my life .  
his room is a mess . 

CPU times: user 0 ns, sys: 46 ms, total: 46 ms
Wall time: 13 ms
In [ ]: