数据挖掘与分析——深度学习算法应用
1. TensorFlow框架的基本使用(5-1)
- 获取训练数据
构建一个简单的线性模型:W,b为参数,W=2,b=1,运用tf.random.normal() 产生1000个随机数,产生x,y数据。
用matplotlib库,用蓝色绘制训练数据。
- 定义模型
通过对样本数据的离散图可以判断,呈线性规律变化,因此可以建立一个线性模型,即 ,把该线性模型定义为一个简单的类,里面封装了变量和计算,变量设置用tf.Variable()。
# 步骤2:定义模型
class LinearModel(tf.Module):
def __init__(self):
self.W = tf.Variable(tf.random.normal(shape=(), stddev=0.1))
self.b = tf.Variable(tf.random.normal(shape=(), stddev=0.1))
def __call__(self, x):
return self.W * x + self.b
- 定义损失函数
损失函数是衡量给定输入的模型输出与期望输出的匹配程度,采用均方误差(L2范数损失函数)。
# 步骤3:定义损失函数
def loss(y_true, y_pred):
return tf.reduce_mean(tf.square(y_true - y_pred))
- 模型训练
运用数据和模型来训练得到模型的变量(W和b),观察W和b的变化(使用matplotlib绘制W和b的变化情况曲线)。
model = LinearModel()
learning_rate = 0.1
epochs = 50
history_W, history_b = [], []
for epoch in range(epochs):
with tf.GradientTape() as tape:
current_loss = loss(outputs, model(inputs))
dW, db = tape.gradient(current_loss, [model.W, model.b])
model.W.assign_sub(learning_rate * dW)
model.b.assign_sub(learning_rate * db)
history_W.append(model.W.numpy())
history_b.append(model.b.numpy())
可视化
# 可视化W和b的变化
plt.plot(history_W, label='W')
plt.plot(history_b, label='b')
plt.xlabel('Epochs')
plt.ylabel('Values')
plt.legend()
plt.show()
完整代码:
import tensorflow as tf
import matplotlib.pyplot as plt
# 步骤1:生成训练数据
num_samples = 1000
true_W = 2
true_b = 1
inputs = tf.random.normal(shape=(num_samples,))
noise = tf.random.normal(shape=(num_samples,))
outputs = inputs * true_W + true_b + noise
# 绘制训练数据
plt.scatter(inputs, outputs, c='b', label='Training data')
plt.xlabel('Input')
plt.ylabel('Output')
plt.legend()
plt.show()
# 步骤2:定义模型
class LinearModel(tf.Module):
def __init__(self):
self.W = tf.Variable(tf.random.normal(shape=(), stddev=0.1))
self.b = tf.Variable(tf.random.normal(shape=(), stddev=0.1))
def __call__(self, x):
return self.W * x + self.b
# 步骤3:定义损失函数
def loss(y_true, y_pred):
return tf.reduce_mean(tf.square(y_true - y_pred))
# 步骤4:模型训练
model = LinearModel()
learning_rate = 0.1
epochs = 50
history_W, history_b = [], []
for epoch in range(epochs):
with tf.GradientTape() as tape:
current_loss = loss(outputs, model(inputs))
dW, db = tape.gradient(current_loss, [model.W, model.b])
model.W.assign_sub(learning_rate * dW)
model.b.assign_sub(learning_rate * db)
history_W.append(model.W.numpy())
history_b.append(model.b.numpy())
# 可视化W和b的变化
plt.plot(history_W, label='W')
plt.plot(history_b, label='b')
plt.xlabel('Epochs')
plt.ylabel('Values')
plt.legend()
plt.show()
2. 多层神经网络分类(5-2)
- 数据获取与预处理
MNIST 数据集来自美国国家标准与技术研究所, National Institute of Standards and Technology (NIST). 训练集 (training set) 由来自 250 个不同人手写的数字构成, 其中 50% 是高中学生, 50% 来自人口普查局 (the Census Bureau) 的工作人员. 测试集(test set) 也是同样比例的手写数字数据。
每张图像的大小都是28x28像素。MNIST数据集有60000张图像用于训练和10000张图像用于测试,其中每张图像都被标记了对应的数字(0-9)。
- 加载数据集
- 查看数据集
- 归一化处理
# 加载MNIST数据集
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
# 输出第一张图片和对应的标签
# 查看训练集中的一张图像和对应的标签
plt.imshow(x_train[0], cmap='gray')
plt.title(f"Label: {x_train[0]}")
plt.axis('off')
plt.show()
# 查看测试集中的一张图像和对应的标签
plt.imshow(x_test[0], cmap='gray')
plt.title(f"Label: {x_test[0]}")
plt.axis('off')
plt.show()
# 对输入数据进行归一化处理
x_train = x_train / 255.0
x_test = x_test / 255.0
- 模型构建
- 模型定义
- 编译模型
- 输出模型参数
# 定义显示图片的函数
def plot_images(images):
plt.imshow(images, cmap='binary')
plt.show()
# 构建神经网络模型
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28,28)), # 将输入展平为一维数组
tf.keras.layers.Dense(256, activation='relu'), # 全连接层,使用ReLU激活函数
tf.keras.layers.Dropout(0.2), # Dropout层,可以防止过拟合
tf.keras.layers.Dense(128, activation='relu'), # 全连接层,使用ReLU激活函数
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dense(10, activation='softmax') # 输出层,使用softmax激活函数输出分类概率
])
# 编译模型
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy', # 使用交叉熵损失函数
metrics=['sparse_categorical_accuracy'])
# 输出模型结构
model.summary()
- 模型训练
- 训练
- 获取训练历史数据中的各指标值
- 绘制指标在训练过程中的变化图
# 训练模型
history = model.fit(x_train, y_train, epochs=50, validation_split=0.2, verbose=1)
train_loss = history.history['loss']
val_loss = history.history['val_loss']
train_accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']
- 模型评估
使用测试集对模型进行评估
# 生成图形
plt.figure(figsize=(12, 4))
# Loss 图
plt.subplot(1, 2, 1)
plt.plot(train_loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
# Accuracy 图
plt.subplot(1, 2, 2)
plt.plot(train_accuracy, label='Training Accuracy')
plt.plot(val_accuracy, label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()
test_loss, test_accuracy = model.evaluate(x_test, y_test)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")
完整代码:
import tensorflow as tf
# 加载MNIST数据集
mnist = tf.keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
import matplotlib.pyplot as plt
# 查看训练集中的一张图像和对应的标签
plt.imshow(train_images[0], cmap='gray')
plt.title(f"Label: {train_labels[0]}")
plt.axis('off')
plt.show()
# 查看测试集中的一张图像和对应的标签
plt.imshow(test_images[0], cmap='gray')
plt.title(f"Label: {test_labels[0]}")
plt.axis('off')
plt.show()
# 将像素值归一化到0到1之间
train_images = train_images.astype('float32') / 255
test_images = test_images.astype('float32') / 255
from tensorflow.keras import models, layers
# 定义模型
model = models.Sequential([
layers.Flatten(input_shape=(28, 28)), # 将28x28的图像展平为784维向量
layers.Dense(512, activation='relu'), # 全连接层,使用ReLU激活函数
layers.Dense(10, activation='softmax') # 输出层,使用softmax激活函数,输出每个数字的概率分布
])
model.compile(optimizer='adam', # 使用Adam优化器
loss='sparse_categorical_crossentropy', # 使用稀疏的交叉熵损失函数
metrics=['accuracy']) # 监控模型的准确率
model.summary()
history = model.fit(train_images, train_labels, epochs=5, batch_size=128, validation_split=0.2)
train_loss = history.history['loss']
val_loss = history.history['val_loss']
train_accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']
plt.figure(figsize=(12, 4))
# Loss 图
plt.subplot(1, 2, 1)
plt.plot(train_loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
# Accuracy 图
plt.subplot(1, 2, 2)
plt.plot(train_accuracy, label='Training Accuracy')
plt.plot(val_accuracy, label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()
test_loss, test_accuracy = model.evaluate(test_images, test_labels)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")
3. 多层神经网络回归(5-3)
- 数据获取与预处理
Auto MPG 数据集,它记录了各种汽车效能指标MPG(Mile Per Gallon)与气缸数、重量、马力等因素的真实数据。除了产地的数字字段表示类别外,其他字段都是数值类型。对于产地地段,1 表示美国,2 表示欧洲,3 表示日本。
数据集例样:
18.0 8 307.0 130.0 3504. 12.0 70 1 "chevrolet chevelle malibu" 15.0 8 350.0 165.0 3693. 11.5 70 1 "buick skylark 320" 18.0 8 318.0 150.0 3436. 11.0 70 1 "plymouth satellite" 16.0 8 304.0 150.0 3433. 12.0 70 1 "amc rebel sst" 17.0 8 302.0 140.0 3449. 10.5 70 1 "ford torino" 15.0 8 429.0 198.0 4341. 10.0 70 1 "ford galaxie 500" 14.0 8 454.0 220.0 4354. 9.0 70 1 "chevrolet impala" 14.0 8 440.0 215.0 4312. 8.5 70 1 "plymouth fury iii" 14.0 8 455.0 225.0 4425. 10.0 70 1 "pontiac catalina" 15.0 8 390.0 190.0 3850. 8.5 70 1 "amc ambassador dpl" 15.0 8 383.0 170.0 3563. 10.0 70 1 "dodge challenger se" 14.0 8 340.0 160.0 3609. 8.0 70 1 "plymouth 'cuda 340" 15.0 8 400.0 150.0 3761. 9.5 70 1 "chevrolet monte carlo" 14.0 8 455.0 225.0 3086. 10.0 70 1 "buick estate wagon (sw)" 24.0 4 113.0 95.00 2372. 15.0 70 3 "toyota corona mark ii" 22.0 6 198.0 95.00 2833. 15.5 70 1 "plymouth duster" 18.0 6 199.0 97.00 2774. 15.5 70 1 "amc hornet" 21.0 6 200.0 85.00 2587. 16.0 70 1 "ford maverick" 27.0 4 97.00 88.00 2130. 14.5 70 3 "datsun pl510" 26.0 4 97.00 46.00 1835. 20.5 70 2 "volkswagen 1131 deluxe sedan" 25.0 4 110.0 87.00 2672. 17.5 70 2 "peugeot 504" 24.0 4 107.0 90.00 2430. 14.5 70 2 "audi 100 ls" 25.0 4 104.0 95.00 2375. 17.5 70 2 "saab 99e" 26.0 4 121.0 113.0 2234. 12.5 70 2 "bmw 2002" 21.0 6 199.0 90.00 2648. 15.0 70 1 "amc gremlin" 10.0 8 360.0 215.0 4615. 14.0 70 1 "ford f250" 10.0 8 307.0 200.0 4376. 15.0 70 1 "chevy c20" 11.0 8 318.0 210.0 4382. 13.5 70 1 "dodge d200" 9.0 8 304.0 193.0 4732. 18.5 70 1 "hi 1200d" 27.0 4 97.00 88.00 2130. 14.5 71 3 "datsun pl510" 28.0 4 140.0 90.00 2264. 15.5 71 1 "chevrolet vega 2300" 25.0 4 113.0 95.00 2228. 14.0 71 3 "toyota corona" 25.0 4 98.00 ? 2046. 19.0 71 1 "ford pinto" 19.0 6 232.0 100.0 2634. 13.0 71 1 "amc gremlin" 16.0 6 225.0 105.0 3439. 15.5 71 1 "plymouth satellite custom" 17.0 6 250.0 100.0 3329. 15.5 71 1 "chevrolet chevelle malibu" 19.0 6 250.0 88.00 3302. 15.5 71 1 "ford torino 500" 18.0 6 232.0 100.0 3288. 15.5 71 1 "amc matador" 14.0 8 350.0 165.0 4209. 12.0 71 1 "chevrolet impala" 14.0 8 400.0 175.0 4464. 11.5 71 1 "pontiac catalina brougham" 14.0 8 351.0 153.0 4154. 13.5 71 1 "ford galaxie 500" 14.0 8 318.0 150.0 4096. 13.0 71 1 "plymouth fury iii" 12.0 8 383.0 180.0 4955. 11.5 71 1 "dodge monaco (sw)" 13.0 8 400.0 170.0 4746. 12.0 71 1 "ford country squire (sw)" 13.0 8 400.0 175.0 5140. 12.0 71 1 "pontiac safari (sw)" 18.0 6 258.0 110.0 2962. 13.5 71 1 "amc hornet sportabout (sw)" 22.0 4 140.0 72.00 2408. 19.0 71 1 "chevrolet vega (sw)" 19.0 6 250.0 100.0 3282. 15.0 71 1 "pontiac firebird" 18.0 6 250.0 88.00 3139. 14.5 71 1 "ford mustang" 23.0 4 122.0 86.00 2220. 14.0 71 1 "mercury capri 2000" 28.0 4 116.0 90.00 2123. 14.0 71 2 "opel 1900" 30.0 4 79.00 70.00 2074. 19.5 71 2 "peugeot 304" 30.0 4 88.00 76.00 2065. 14.5 71 2 "fiat 124b" 31.0 4 71.00 65.00 1773. 19.0 71 3 "toyota corolla 1200" 35.0 4 72.00 69.00 1613. 18.0 71 3 "datsun 1200" 27.0 4 97.00 60.00 1834. 19.0 71 2 "volkswagen model 111" 26.0 4 91.00 70.00 1955. 20.5 71 1 "plymouth cricket" 24.0 4 113.0 95.00 2278. 15.5 72 3 "toyota corona hardtop" 25.0 4 97.50 80.00 2126. 17.0 72 1 "dodge colt hardtop" 23.0 4 97.00 54.00 2254. 23.5 72 2 "volkswagen type 3" 20.0 4 140.0 90.00 2408. 19.5 72 1 "chevrolet vega" 21.0 4 122.0 86.00 2226. 16.5 72 1 "ford pinto runabout" 13.0 8 350.0 165.0 4274. 12.0 72 1 "chevrolet impala" 14.0 8 400.0 175.0 4385. 12.0 72 1 "pontiac catalina" 15.0 8 318.0 150.0 4135. 13.5 72 1 "plymouth fury iii" 14.0 8 351.0 153.0 4129. 13.0 72 1 "ford galaxie 500" 17.0 8 304.0 150.0 3672. 11.5 72 1 "amc ambassador sst" 11.0 8 429.0 208.0 4633. 11.0 72 1 "mercury marquis" 13.0 8 350.0 155.0 4502. 13.5 72 1 "buick lesabre custom" 12.0 8 350.0 160.0 4456. 13.5 72 1 "oldsmobile delta 88 royale" 13.0 8 400.0 190.0 4422. 12.5 72 1 "chrysler newport royal" 19.0 3 70.00 97.00 2330. 13.5 72 3 "mazda rx2 coupe" 15.0 8 304.0 150.0 3892. 12.5 72 1 "amc matador (sw)" 13.0 8 307.0 130.0 4098. 14.0 72 1 "chevrolet chevelle concours (sw)" 13.0 8 302.0 140.0 4294. 16.0 72 1 "ford gran torino (sw)" 14.0 8 318.0 150.0 4077. 14.0 72 1 "plymouth satellite custom (sw)" 18.0 4 121.0 112.0 2933. 14.5 72 2 "volvo 145e (sw)" 22.0 4 121.0 76.00 2511. 18.0 72 2 "volkswagen 411 (sw)" 21.0 4 120.0 87.00 2979. 19.5 72 2 "peugeot 504 (sw)" 26.0 4 96.00 69.00 2189. 18.0 72 2 "renault 12 (sw)" 22.0 4 122.0 86.00 2395. 16.0 72 1 "ford pinto (sw)" 28.0 4 97.00 92.00 2288. 17.0 72 3 "datsun 510 (sw)" 23.0 4 120.0 97.00 2506. 14.5 72 3 "toyouta corona mark ii (sw)" 28.0 4 98.00 80.00 2164. 15.0 72 1 "dodge colt (sw)" 27.0 4 97.00 88.00 2100. 16.5 72 3 "toyota corolla 1600 (sw)" 13.0 8 350.0 175.0 4100. 13.0 73 1 "buick century 350" 14.0 8 304.0 150.0 3672. 11.5 73 1 "amc matador" 13.0 8 350.0 145.0 3988. 13.0 73 1 "chevrolet malibu" 14.0 8 302.0 137.0 4042. 14.5 73 1 "ford gran torino" 15.0 8 318.0 150.0 3777. 12.5 73 1 "dodge coronet custom" 12.0 8 429.0 198.0 4952. 11.5 73 1 "mercury marquis brougham" 13.0 8 400.0 150.0 4464. 12.0 73 1 "chevrolet caprice classic" 13.0 8 351.0 158.0 4363. 13.0 73 1 "ford ltd" 14.0 8 318.0 150.0 4237. 14.5 73 1 "plymouth fury gran sedan" 13.0 8 440.0 215.0 4735. 11.0 73 1 "chrysler new yorker brougham" 12.0 8 455.0 225.0 4951. 11.0 73 1 "buick electra 225 custom" 13.0 8 360.0 175.0 3821. 11.0 73 1 "amc ambassador brougham" 18.0 6 225.0 105.0 3121. 16.5 73 1 "plymouth valiant" 16.0 6 250.0 100.0 3278. 18.0 73 1 "chevrolet nova custom" 18.0 6 232.0 100.0 2945. 16.0 73 1 "amc hornet" 18.0 6 250.0 88.00 3021. 16.5 73 1 "ford maverick" 23.0 6 198.0 95.00 2904. 16.0 73 1 "plymouth duster" 26.0 4 97.00 46.00 1950. 21.0 73 2 "volkswagen super beetle" 11.0 8 400.0 150.0 4997. 14.0 73 1 "chevrolet impala" 12.0 8 400.0 167.0 4906. 12.5 73 1 "ford country" 13.0 8 360.0 170.0 4654. 13.0 73 1 "plymouth custom suburb" 12.0 8 350.0 180.0 4499. 12.5 73 1 "oldsmobile vista cruiser" 18.0 6 232.0 100.0 2789. 15.0 73 1 "amc gremlin" 20.0 4 97.00 88.00 2279. 19.0 73 3 "toyota carina" 21.0 4 140.0 72.00 2401. 19.5 73 1 "chevrolet vega" 22.0 4 108.0 94.00 2379. 16.5 73 3 "datsun 610" 18.0 3 70.00 90.00 2124. 13.5 73 3 "maxda rx3" 19.0 4 122.0 85.00 2310. 18.5 73 1 "ford pinto" 21.0 6 155.0 107.0 2472. 14.0 73 1 "mercury capri v6" 26.0 4 98.00 90.00 2265. 15.5 73 2 "fiat 124 sport coupe" 15.0 8 350.0 145.0 4082. 13.0 73 1 "chevrolet monte carlo s" 16.0 8 400.0 230.0 4278. 9.50 73 1 "pontiac grand prix" 29.0 4 68.00 49.00 1867. 19.5 73 2 "fiat 128" 24.0 4 116.0 75.00 2158. 15.5 73 2 "opel manta" 20.0 4 114.0 91.00 2582. 14.0 73 2 "audi 100ls" 19.0 4 121.0 112.0 2868. 15.5 73 2 "volvo 144ea" 15.0 8 318.0 150.0 3399. 11.0 73 1 "dodge dart custom" 24.0 4 121.0 110.0 2660. 14.0 73 2 "saab 99le" 20.0 6 156.0 122.0 2807. 13.5 73 3 "toyota mark ii" 11.0 8 350.0 180.0 3664. 11.0 73 1 "oldsmobile omega" 20.0 6 198.0 95.00 3102. 16.5 74 1 "plymouth duster" 21.0 6 200.0 ? 2875. 17.0 74 1 "ford maverick" 19.0 6 232.0 100.0 2901. 16.0 74 1 "amc hornet" 15.0 6 250.0 100.0 3336. 17.0 74 1 "chevrolet nova" 31.0 4 79.00 67.00 1950. 19.0 74 3 "datsun b210" 26.0 4 122.0 80.00 2451. 16.5 74 1 "ford pinto" 32.0 4 71.00 65.00 1836. 21.0 74 3 "toyota corolla 1200" 25.0 4 140.0 75.00 2542. 17.0 74 1 "chevrolet vega" 16.0 6 250.0 100.0 3781. 17.0 74 1 "chevrolet chevelle malibu classic" 16.0 6 258.0 110.0 3632. 18.0 74 1 "amc matador" 18.0 6 225.0 105.0 3613. 16.5 74 1 "plymouth satellite sebring" 16.0 8 302.0 140.0 4141. 14.0 74 1 "ford gran torino" 13.0 8 350.0 150.0 4699. 14.5 74 1 "buick century luxus (sw)" 14.0 8 318.0 150.0 4457. 13.5 74 1 "dodge coronet custom (sw)" 14.0 8 302.0 140.0 4638. 16.0 74 1 "ford gran torino (sw)" 14.0 8 304.0 150.0 4257. 15.5 74 1 "amc matador (sw)" 29.0 4 98.00 83.00 2219. 16.5 74 2 "audi fox" 26.0 4 79.00 67.00 1963. 15.5 74 2 "volkswagen dasher" 26.0 4 97.00 78.00 2300. 14.5 74 2 "opel manta" 31.0 4 76.00 52.00 1649. 16.5 74 3 "toyota corona" 32.0 4 83.00 61.00 2003. 19.0 74 3 "datsun 710" 28.0 4 90.00 75.00 2125. 14.5 74 1 "dodge colt" 24.0 4 90.00 75.00 2108. 15.5 74 2 "fiat 128" 26.0 4 116.0 75.00 2246. 14.0 74 2 "fiat 124 tc" 24.0 4 120.0 97.00 2489. 15.0 74 3 "honda civic" 26.0 4 108.0 93.00 2391. 15.5 74 3 "subaru" 31.0 4 79.00 67.00 2000. 16.0 74 2 "fiat x1.9" 19.0 6 225.0 95.00 3264. 16.0 75 1 "plymouth valiant custom" 18.0 6 250.0 105.0 3459. 16.0 75 1 "chevrolet nova" 15.0 6 250.0 72.00 3432. 21.0 75 1 "mercury monarch" 15.0 6 250.0 72.00 3158. 19.5 75 1 "ford maverick" 16.0 8 400.0 170.0 4668. 11.5 75 1 "pontiac catalina" 15.0 8 350.0 145.0 4440. 14.0 75 1 "chevrolet bel air" 16.0 8 318.0 150.0 4498. 14.5 75 1 "plymouth grand fury" 14.0 8 351.0 148.0 4657. 13.5 75 1 "ford ltd" 17.0 6 231.0 110.0 3907. 21.0 75 1 "buick century" 16.0 6 250.0 105.0 3897. 18.5 75 1 "chevroelt chevelle malibu" 15.0 6 258.0 110.0 3730. 19.0 75 1 "amc matador" 18.0 6 225.0 95.00 3785. 19.0 75 1 "plymouth fury" 21.0 6 231.0 110.0 3039. 15.0 75 1 "buick skyhawk" 20.0 8 262.0 110.0 3221. 13.5 75 1 "chevrolet monza 2+2" 13.0 8 302.0 129.0 3169. 12.0 75 1 "ford mustang ii" 29.0 4 97.00 75.00 2171. 16.0 75 3 "toyota corolla" 23.0 4 140.0 83.00 2639. 17.0 75 1 "ford pinto" 20.0 6 232.0 100.0 2914. 16.0 75 1 "amc gremlin" 23.0 4 140.0 78.00 2592. 18.5 75 1 "pontiac astro" 24.0 4 134.0 96.00 2702. 13.5 75 3 "toyota corona" 25.0 4 90.00 71.00 2223. 16.5 75 2 "volkswagen dasher" 24.0 4 119.0 97.00 2545. 17.0 75 3 "datsun 710" 18.0 6 171.0 97.00 2984. 14.5 75 1 "ford pinto" 29.0 4 90.00 70.00 1937. 14.0 75 2 "volkswagen rabbit" 19.0 6 232.0 90.00 3211. 17.0 75 1 "amc pacer" 23.0 4 115.0 95.00 2694. 15.0 75 2 "audi 100ls" 23.0 4 120.0 88.00 2957. 17.0 75 2 "peugeot 504" 22.0 4 121.0 98.00 2945. 14.5 75 2 "volvo 244dl" 25.0 4 121.0 115.0 2671. 13.5 75 2 "saab 99le" 33.0 4 91.00 53.00 1795. 17.5 75 3 "honda civic cvcc" 28.0 4 107.0 86.00 2464. 15.5 76 2 "fiat 131" 25.0 4 116.0 81.00 2220. 16.9 76 2 "opel 1900" 25.0 4 140.0 92.00 2572. 14.9 76 1 "capri ii" 26.0 4 98.00 79.00 2255. 17.7 76 1 "dodge colt" 27.0 4 101.0 83.00 2202. 15.3 76 2 "renault 12tl" 17.5 8 305.0 140.0 4215. 13.0 76 1 "chevrolet chevelle malibu classic" 16.0 8 318.0 150.0 4190. 13.0 76 1 "dodge coronet brougham" 15.5 8 304.0 120.0 3962. 13.9 76 1 "amc matador" 14.5 8 351.0 152.0 4215. 12.8 76 1 "ford gran torino" 22.0 6 225.0 100.0 3233. 15.4 76 1 "plymouth valiant" 22.0 6 250.0 105.0 3353. 14.5 76 1 "chevrolet nova" 24.0 6 200.0 81.00 3012. 17.6 76 1 "ford maverick" 22.5 6 232.0 90.00 3085. 17.6 76 1 "amc hornet" 29.0 4 85.00 52.00 2035. 22.2 76 1 "chevrolet chevette" 24.5 4 98.00 60.00 2164. 22.1 76 1 "chevrolet woody" 29.0 4 90.00 70.00 1937. 14.2 76 2 "vw rabbit" 33.0 4 91.00 53.00 1795. 17.4 76 3 "honda civic" 20.0 6 225.0 100.0 3651. 17.7 76 1 "dodge aspen se" 18.0 6 250.0 78.00 3574. 21.0 76 1 "ford granada ghia" 18.5 6 250.0 110.0 3645. 16.2 76 1 "pontiac ventura sj" 17.5 6 258.0 95.00 3193. 17.8 76 1 "amc pacer d/l" 29.5 4 97.00 71.00 1825. 12.2 76 2 "volkswagen rabbit" 32.0 4 85.00 70.00 1990. 17.0 76 3 "datsun b-210" 28.0 4 97.00 75.00 2155. 16.4 76 3 "toyota corolla" 26.5 4 140.0 72.00 2565. 13.6 76 1 "ford pinto" 20.0 4 130.0 102.0 3150. 15.7 76 2 "volvo 245" 13.0 8 318.0 150.0 3940. 13.2 76 1 "plymouth volare premier v8" 19.0 4 120.0 88.00 3270. 21.9 76 2 "peugeot 504" 19.0 6 156.0 108.0 2930. 15.5 76 3 "toyota mark ii" 16.5 6 168.0 120.0 3820. 16.7 76 2 "mercedes-benz 280s" 16.5 8 350.0 180.0 4380. 12.1 76 1 "cadillac seville" 13.0 8 350.0 145.0 4055. 12.0 76 1 "chevy c10" 13.0 8 302.0 130.0 3870. 15.0 76 1 "ford f108" 13.0 8 318.0 150.0 3755. 14.0 76 1 "dodge d100" 31.5 4 98.00 68.00 2045. 18.5 77 3 "honda accord cvcc" 30.0 4 111.0 80.00 2155. 14.8 77 1 "buick opel isuzu deluxe" 36.0 4 79.00 58.00 1825. 18.6 77 2 "renault 5 gtl" 25.5 4 122.0 96.00 2300. 15.5 77 1 "plymouth arrow gs" 33.5 4 85.00 70.00 1945. 16.8 77 3 "datsun f-10 hatchback" 17.5 8 305.0 145.0 3880. 12.5 77 1 "chevrolet caprice classic" 17.0 8 260.0 110.0 4060. 19.0 77 1 "oldsmobile cutlass supreme" 15.5 8 318.0 145.0 4140. 13.7 77 1 "dodge monaco brougham" 15.0 8 302.0 130.0 4295. 14.9 77 1 "mercury cougar brougham" 17.5 6 250.0 110.0 3520. 16.4 77 1 "chevrolet concours" 20.5 6 231.0 105.0 3425. 16.9 77 1 "buick skylark" 19.0 6 225.0 100.0 3630. 17.7 77 1 "plymouth volare custom" 18.5 6 250.0 98.00 3525. 19.0 77 1 "ford granada" 16.0 8 400.0 180.0 4220. 11.1 77 1 "pontiac grand prix lj" 15.5 8 350.0 170.0 4165. 11.4 77 1 "chevrolet monte carlo landau" 15.5 8 400.0 190.0 4325. 12.2 77 1 "chrysler cordoba" 16.0 8 351.0 149.0 4335. 14.5 77 1 "ford thunderbird" 29.0 4 97.00 78.00 1940. 14.5 77 2 "volkswage
- 加载数据集
# 加载数据集
column_names = ['MPG','Cylinders','Displacement','Horsepower','Weight',
'Acceleration', 'Model Year', 'Origin']
raw_dataset = pd.read_csv('auto-mpg.data', names=column_names,
na_values = "?", comment='\t',
sep=" ", skipinitialspace=True)
print(raw_dataset)
- 数据清洗
统计数据集中各列中空值的个数,并删除包含空值的行。
- 将Origin列转换为one-hot(独热)编码。
- 数据探索
- 使用describe方法查看数据的统计指标
- 使用seaborn库中pairplot方法绘制"MPG", "Cylinders", "Displacement", "Weight"四列的联合分布图
# 数据清洗
dataset = raw_dataset.dropna()
# 将Origin列转换为one-hot编码
dataset['Origin'] = dataset['Origin'].map({1: 'USA', 2: 'Europe', 3: 'Japan'})
dataset = pd.get_dummies(dataset, columns=['Origin'], prefix='', prefix_sep='')
# 数据探索
print(dataset.describe())
sns.pairplot(dataset[['MPG', 'Cylinders', 'Displacement', 'Weight']], diag_kind='kde')
- 数据标准化
# 数据标准化
labels = dataset.pop('MPG')
train_stats = dataset.describe().transpose()
def norm(x):
return (x - train_stats['mean']) / train_stats['std']
normed_dataset = norm(dataset)
- 划分训练集与测试集
#拆分训练数据集和测试数据集,将数据集拆分为一个训练数据集和一个测试数据集。
X_train, X_test, Y_train, Y_test = train_test_split(normed_dataset,labels,test_size=0.2,random_state=0)
- 模型构建
- 模型定义
# 模型构建
model = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation='relu', input_shape=[X_train.shape[1]]),
tf.keras.layers.Dropout(0.3),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dense(1)
])
- 编译模型
loss='mse' #损失用mse
optimizer='adam'
metrics=['mae', 'mse'])
# 编译模型
model.compile(loss='mse', optimizer='adam', metrics=['mae', 'mse'])
- 输出模型参数
print(model.summary())
- 模型训练
- 训练
epochs=100,
validation_split = 0.2
verbose=1
# 模型训练
history = model.fit(X_train, Y_train, epochs=1000, validation_split=0.3, verbose=1)
- 获取训练历史数据中的各指标值
mae = history.history['mae']
val_mae = history.history['val_mae']
mse = history.history['mse']
val_mse = history.history['val_mse']
- 绘制指标在训练过程中的变化图
plt.figure(1)
plt.plot(mae, label='Training MAE')
plt.plot(val_mae, label='Validation MAE')
plt.title('Training and Validation MAE')
plt.legend()
plt.figure(2)
plt.plot(mse, label='Training MSE')
plt.plot(val_mse, label='Validation MSE')
plt.title('Training and Validation MSE')
plt.legend()
plt.show()
- 模型评估
使用测试集对模型进行评估
# 测试模型
model.evaluate(X_test, Y_test, verbose=1)
# 模型评估
h1=model.evaluate(X_test, Y_test, verbose=1)
print(h1)
完整代码
# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
# 加载数据集
column_names = ['MPG','Cylinders','Displacement','Horsepower','Weight',
'Acceleration', 'Model Year', 'Origin']
raw_dataset = pd.read_csv('auto-mpg.data', names=column_names,
na_values = "?", comment='\t',
sep=" ", skipinitialspace=True)
print(raw_dataset)
# 数据清洗
dataset = raw_dataset.dropna()
# 将Origin列转换为one-hot编码
dataset['Origin'] = dataset['Origin'].map({1: 'USA', 2: 'Europe', 3: 'Japan'})
dataset = pd.get_dummies(dataset, columns=['Origin'], prefix='', prefix_sep='')
# 数据探索
print(dataset.describe())
sns.pairplot(dataset[['MPG', 'Cylinders', 'Displacement', 'Weight']], diag_kind='kde')
# 数据标准化
labels = dataset.pop('MPG')
train_stats = dataset.describe().transpose()
def norm(x):
return (x - train_stats['mean']) / train_stats['std']
normed_dataset = norm(dataset)
# 划分训练集与测试集
X_train, X_test, Y_train, Y_test = train_test_split(normed_dataset, labels, test_size=0.2, random_state=0)
# 模型构建
model = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation='relu', input_shape=[X_train.shape[1]]),
tf.keras.layers.Dropout(0.3),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dense(1)
])
# 编译模型
model.compile(loss='mse', optimizer='adam', metrics=['mae', 'mse'])
# 输出模型参数
print(model.summary())
# 模型训练
history = model.fit(X_train, Y_train, epochs=100, validation_split=0.3, verbose=1)
# 获取训练历史数据中的各指标值
mae = history.history['mae']
val_mae = history.history['val_mae']
mse = history.history['mse']
val_mse = history.history['val_mse']
plt.figure()
plt.plot(mae, label='Training MAE')
plt.plot(val_mae, label='Validation MAE')
plt.title('Training and Validation MAE')
plt.legend()
plt.show()
plt.figure()
plt.plot(mse, label='Training MSE')
plt.plot(val_mse, label='Validation MSE')
plt.title('Training and Validation MSE')
plt.legend()
plt.show()
# 模型评估
h1=model.evaluate(X_test, Y_test, verbose=1)
print(h1)
4. 多层神经网络回归(5-4)
- 数据获取与预处理
IMDB数据集,有5万条来自网络电影数据库的评论,其中25000千条用来训练,25000用来测试,每个部分正负评论各占50%。和MNIST数据集类似,IMDB数据集也集成在Keras中,同时经过了预处理:电影评论转换成了一系列数字,每个数字代表字典中的一个单词(表示该单词出现频率的排名)。
- 读取数据
# 加载数据,评论文本已转换为整数,其中每个整数表示字典中的特定单词
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=10000)
- 预处理
# 循环神经网络输入长度固定
# 这里应该注意,循环神经网络的输入是固定长度的,否则运行后会出错。
# 由于电影评论的长度必须相同,pad_sequences 函数来标准化评论长度
x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=100)
x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test, maxlen=100)
- 模型搭建
- 模型定义
model = Sequential([
#定义嵌入层
Embedding(10000, # 词汇表大小中收录单词数量,也就是嵌入层矩阵的行数
128, # 每个单词的维度,也就是嵌入层矩阵的列数
input_length=100),
# 定义LSTM隐藏层
LSTM(128, dropout=0.2, recurrent_dropout=0.2),
# 模型输出层
Dense(1, activation='sigmoid')
])
- 编译模型
model.compile(loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy'])
- 模型训练
# 模型训练
history = model.fit(x_train, y_train,
epochs=20,
batch_size=32,
validation_split=0.2,
verbose=1)
- 训练
epochs=5,
validation_split = 0.2
verbose=1
- 获取训练历史数据中的各指标值
- 绘制指标在训练过程中的变化图
- 模型评估
使用测试集对模型进行评估
plt.figure(figsize=(12, 6))
# 绘制准确率曲线
plt.subplot(1, 2, 1)
plt.plot(epochs, acc, 'b', label='Training accuracy')
plt.plot(epochs, val_acc, 'r', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
# 绘制损失曲线
plt.subplot(1, 2, 2)
plt.plot(epochs, loss, 'b', label='Training Loss')
plt.plot(epochs, val_loss, 'r', label='Validation Loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()
plot_history(history)
# 模型评估
test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2)
print(f'Test loss: {test_loss}')
print(f'Test accuracy: {test_acc}')
完整代码:
# -*- coding: utf-8 -*-
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
import matplotlib.pyplot as plt
# 加载数据
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=10000)
# 数据预处理
x_train = pad_sequences(x_train, maxlen=100)
x_test = pad_sequences(x_test, maxlen=100)
print(x_train.shape)
print(x_test.shape)
# 模型定义
model = Sequential([
Embedding(10000, 128),
LSTM(128, dropout=0.2, recurrent_dropout=0.2, input_shape=(100,)),
Dense(1, activation='sigmoid')
])
# 编译模型
model.compile(loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy'])
model.summary()
# 模型训练
history = model.fit(x_train, y_train,
epochs=20,
batch_size=32,
validation_split=0.2,
verbose=1)
# 绘制训练过程中的变化图
def plot_history(history):
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
plt.figure(figsize=(12, 6))
# 绘制准确率曲线
plt.subplot(1, 2, 1)
plt.plot(epochs, acc, 'b', label='Training accuracy')
plt.plot(epochs, val_acc, 'r', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
# 绘制损失曲线
plt.subplot(1, 2, 2)
plt.plot(epochs, loss, 'b', label='Training Loss')
plt.plot(epochs, val_loss, 'r', label='Validation Loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()
plot_history(history)
# 模型评估
test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2)
print(f'Test loss: {test_loss}')
print(f'Test accuracy: {test_acc}')
原文地址:https://blog.csdn.net/weixin_66547608/article/details/139880768
免责声明:本站文章内容转载自网络资源,如本站内容侵犯了原著者的合法权益,可联系本站删除。更多内容请关注自学内容网(zxcms.com)!