TensorFlowとPyTorchの比較 - らんだむな記憶

import torch
import numpy as np
import tensorflow as tf

をしておいて比較してみる。

def softmax(x):
    return torch.exp(x) / (torch.sum(torch.exp(x), dim=1)).view(-1, 1)

# use float point instead of int in order to avoid 'exp_vml_cpu not implemented for 'Long'' error
data = np.array([[1,2],
                 [3,1]], dtype=np.float64)
x = torch.tensor(data)

print(type(x))
print(torch.exp(x))
print(torch.exp(x).sum())
print(torch.exp(x).sum(dim=0))
print(torch.exp(x).sum(dim=1))
print(torch.exp(x).sum(dim=1).view(-1, 1))
print(softmax(x))
print(torch.nn.Softmax(dim=1)(x))
print(softmax(x).sum(dim=1))

<class 'torch.Tensor'>
tensor([[ 2.7183,  7.3891],
        [20.0855,  2.7183]], dtype=torch.float64)
tensor(32.9112, dtype=torch.float64)
tensor([22.8038, 10.1073], dtype=torch.float64)
tensor([10.1073, 22.8038], dtype=torch.float64)
tensor([[10.1073],
        [22.8038]], dtype=torch.float64)
tensor([[0.2689, 0.7311],
        [0.8808, 0.1192]], dtype=torch.float64)
tensor([[0.2689, 0.7311],
        [0.8808, 0.1192]], dtype=torch.float64)
tensor([1., 1.], dtype=torch.float64)

と

def softmax(x):
    return tf.exp(x) / tf.reshape(tf.reduce_sum(tf.exp(x), axis=1), (-1,1))

data = np.array([[1,2],
                 [3,1]], dtype=np.float64)
x = tf.constant(data)

print(type(x))
print(tf.exp(x))
print(tf.reduce_sum(tf.exp(x)))
print(tf.reduce_sum(tf.exp(x), axis=0))
print(tf.reduce_sum(tf.exp(x), axis=1))
print(tf.reshape(tf.reduce_sum(tf.exp(x), axis=1), (-1,1)))
print(softmax(x))
print(tf.nn.softmax(x))
print(tf.reduce_sum(softmax(x), axis=1))

<class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor(
[[ 2.71828183  7.3890561 ]
 [20.08553692  2.71828183]], shape=(2, 2), dtype=float64)
tf.Tensor(32.91115667903641, shape=(), dtype=float64)
tf.Tensor([22.80381875 10.10733793], shape=(2,), dtype=float64)
tf.Tensor([10.10733793 22.80381875], shape=(2,), dtype=float64)
tf.Tensor(
[[10.10733793]
 [22.80381875]], shape=(2, 1), dtype=float64)
tf.Tensor(
[[0.26894142 0.73105858]
 [0.88079708 0.11920292]], shape=(2, 2), dtype=float64)
tf.Tensor(
[[0.26894142 0.73105858]
 [0.88079708 0.11920292]], shape=(2, 2), dtype=float64)
tf.Tensor([1. 1.], shape=(2,), dtype=float64)

意外とreduce_sumと書くことに違和感を感じなくもない・・・が次元が削減される意味合いをメソッド名に含めているのだろう。tf.Tensor.sumはないんだな。
PyTorchのreshapeとviewの違いもなかなかピンと来ないなぁ: What's the difference between reshape and view in pytorch? - Stack Overflow

dimとかaxisについてはtf.reduce_meanの使い方と意味 - Qiitaの考え方で見ると良さそう。