らんだむな記憶

blogというものを体験してみようか!的なー

StudentAdmissions

Tensorflow + Keras で書き直すと以下のような感じでいいのだろうか・・・うぅん・・・
また今度考える・・・

from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
import math
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

def main():
    tf.enable_eager_execution()

    # データの読み込み
    data = pd.read_csv('student_data.csv')
    # rankをone-hotエンコーディング
    one_hot_data = pd.concat([data, pd.get_dummies(data['rank'], prefix='rank')], axis=1)
    one_hot_data = one_hot_data.drop('rank', axis=1)

    # [0, 1]に収まるように各データを正規化
    processed_data = one_hot_data[:]
    processed_data["gpa"] /= 4
    processed_data["gre"] /= 800

    # 訓練データ:テストデータ = 9:1 で分配
    sample = np.random.choice(processed_data.index, size=int(len(processed_data)*0.9), replace=False)
    train_data, test_data = processed_data.iloc[sample], processed_data.drop(sample)

    features = train_data.drop('admit', axis=1).values
    targets = train_data['admit'].values
    features_test = test_data.drop('admit', axis=1).values
    targets_test = test_data['admit'].values

    n_records, n_features = features.shape

    # 2層のNNを作成
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(1,  activation=tf.nn.sigmoid, input_shape=(n_features,))
    ])

    # 2値分類
    model.compile(optimizer='rmsprop', 
        loss='binary_crossentropy',
        metrics=['accuracy'])

    epochs = 1000
    history = model.fit(x=features, y=targets, epochs=epochs)

    test_loss, test_accuracy = model.evaluate(x=features_test, y=targets_test)
    print('Accuracy on test dataset:', test_accuracy)

if __name__ == "__main__":
    main()