library(ggplot2)
theme_set(theme_bw(base_family = "STKaiti"))
###################################
##说明:在R中加载深度学习包keras及调用包中的函数之前,可能需要先加载TensorFlow环境,其加载方法为:安装Anaconda软件 → 在Mac的“启动台”找到“其他”并打开“终端” → 输入命令pip install tensorflow后回车,下载完成后即可。详细的操作方法可以查看Program文件夹中的使用说明。
###################################
## 也可以使用tensorflow中的函数安装TensorFlow环境
# library(tensorflow)
# install_tensorflow()
计算机视觉图像图像分类,fashion minist classification
library(keras)
# install_keras()
library(caret)
## Loading required package: lattice
#####################
## 运行下面代码中的函数dataset_fashion_mnist()时,会自动下载fashion-mnist数据集,若下载速度较慢或出错,可以将Program\Other文件夹中已经下载好的fashion-mnist文件夹复制到用户目录下的~/.keras/datasets中,再运行代码时则会自动载入。
####################
fashion <- dataset_fashion_mnist()
## 数据预处理
x_train <- fashion$train$x / 255.0
y_train <- fashion$train$y
x_test <- fashion$test$x / 255.0
y_test <- fashion$test$y
y_train <- to_categorical(y_train, num_classes = 10)
y_test <- to_categorical(y_test, num_classes = 10)
dim(x_train)
## [1] 60000 28 28
dim(x_test)
## [1] 10000 28 28
dim(y_train)
## [1] 60000 10
class_names = c('T-shirt','Trouser','Pullover','Dress','Coat', 'Sandal',
'Shirt','Sneaker','Bag','Ankle boot')
## 更改图像的数据维度
dim(x_train) <- c(nrow(x_train),28,28,1)
dim(x_test) <- c(nrow(x_test),28,28,1)
## 建立LeNet-5网络
inputs <- layer_input(shape = c(28,28,1))
prediction <- inputs%>%
layer_batch_normalization(name="norm")%>%
layer_zero_padding_2d(padding = c(2, 2))%>%
layer_conv_2d(6,kernel_size = c(5,5),strides = c(1,1),
padding="valid",activation = "tanh",name="conv1")%>%
layer_max_pooling_2d(pool_size = c(2, 2),strides = c(2,2),
name ="pool1" )%>%
layer_conv_2d(16,kernel_size = c(5,5),strides = c(1,1),
padding="valid",activation = "tanh",name="conv2")%>%
layer_max_pooling_2d(pool_size = c(2, 2),strides = c(2,2),
name ='pool2' )%>%
layer_flatten()%>%
layer_dense(120,activation="tanh",name="fc1")%>%
layer_dense(84,activation="tanh",name="fc2")%>%
layer_dense(10,activation='softmax',name="soft")
model <- keras_model(inputs = inputs,outputs = prediction)
summary(model)
## Model: "model"
## ________________________________________________________________________________
## Layer (type) Output Shape Param #
## ================================================================================
## input_1 (InputLayer) [(None, 28, 28, 1)] 0
## ________________________________________________________________________________
## norm (BatchNormalization) (None, 28, 28, 1) 4
## ________________________________________________________________________________
## zero_padding2d (ZeroPadding2D) (None, 32, 32, 1) 0
## ________________________________________________________________________________
## conv1 (Conv2D) (None, 28, 28, 6) 156
## ________________________________________________________________________________
## pool1 (MaxPooling2D) (None, 14, 14, 6) 0
## ________________________________________________________________________________
## conv2 (Conv2D) (None, 10, 10, 16) 2416
## ________________________________________________________________________________
## pool2 (MaxPooling2D) (None, 5, 5, 16) 0
## ________________________________________________________________________________
## flatten (Flatten) (None, 400) 0
## ________________________________________________________________________________
## fc1 (Dense) (None, 120) 48120
## ________________________________________________________________________________
## fc2 (Dense) (None, 84) 10164
## ________________________________________________________________________________
## soft (Dense) (None, 10) 850
## ================================================================================
## Total params: 61,710
## Trainable params: 61,708
## Non-trainable params: 2
## ________________________________________________________________________________
## compile model
model %>% compile(
optimizer = 'rmsprop',
loss = 'categorical_crossentropy',
metrics = c('accuracy')
)
## 模型训练,20%的数据作为验证集
## 设置训练提前停止的条件
# call_es <- callback_early_stopping(monitor = "val_loss",min_delta = 1e-6)
fit_history <- model %>%
fit(x = x_train, y = y_train,batch_size =128, epochs = 10,
validation_split = 0.2)
## 可视化训练过程
plot(fit_history)+theme_bw(base_family = "STKaiti")+
geom_point(aes(shape=data),size=3)+
ggtitle("LeNet-5模型")
## 计算模型在测试集上的精度
model %>% evaluate(x_test,y_test)
## $loss
## [1] 0.3044203
##
## $accuracy
## [1] 0.8942
test_pre <- model %>% predict(x_test)
test_preclass <- as.factor(apply(test_pre,1,which.max) -1)
confusionMatrix(test_preclass,as.factor(fashion$test$y))
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1 2 3 4 5 6 7 8 9
## 0 884 1 23 10 2 0 144 0 0 0
## 1 1 981 1 16 1 0 1 0 0 0
## 2 12 1 874 10 85 0 77 0 3 0
## 3 27 12 13 920 56 0 33 0 5 0
## 4 0 1 37 15 771 0 65 0 1 0
## 5 2 0 0 0 0 966 0 20 1 8
## 6 61 2 49 22 81 0 668 0 7 1
## 7 0 0 0 0 0 21 0 969 4 59
## 8 13 2 3 7 4 6 12 2 978 1
## 9 0 0 0 0 0 7 0 9 1 931
##
## Overall Statistics
##
## Accuracy : 0.8942
## 95% CI : (0.888, 0.9002)
## No Information Rate : 0.1
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.8824
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 0 Class: 1 Class: 2 Class: 3 Class: 4 Class: 5
## Sensitivity 0.8840 0.9810 0.8740 0.9200 0.7710 0.9660
## Specificity 0.9800 0.9978 0.9791 0.9838 0.9868 0.9966
## Pos Pred Value 0.8308 0.9800 0.8230 0.8630 0.8663 0.9689
## Neg Pred Value 0.9870 0.9979 0.9859 0.9910 0.9749 0.9962
## Prevalence 0.1000 0.1000 0.1000 0.1000 0.1000 0.1000
## Detection Rate 0.0884 0.0981 0.0874 0.0920 0.0771 0.0966
## Detection Prevalence 0.1064 0.1001 0.1062 0.1066 0.0890 0.0997
## Balanced Accuracy 0.9320 0.9894 0.9266 0.9519 0.8789 0.9813
## Class: 6 Class: 7 Class: 8 Class: 9
## Sensitivity 0.6680 0.9690 0.9780 0.9310
## Specificity 0.9752 0.9907 0.9944 0.9981
## Pos Pred Value 0.7497 0.9202 0.9514 0.9821
## Neg Pred Value 0.9636 0.9965 0.9975 0.9924
## Prevalence 0.1000 0.1000 0.1000 0.1000
## Detection Rate 0.0668 0.0969 0.0978 0.0931
## Detection Prevalence 0.0891 0.1053 0.1028 0.0948
## Balanced Accuracy 0.8216 0.9798 0.9862 0.9646
## 使用混淆矩阵热力图可视化哪些预测正确
confum <- confusionMatrix(test_preclass,as.factor(fashion$test$y))
confumat <- as.data.frame(confum$table)
confumat[,1:2] <- apply(confumat[,1:2],2,as.integer)
ggplot(confumat,aes(x=Reference,y = Prediction))+
geom_tile(aes(fill = Freq))+
geom_text(aes(label = Freq))+
scale_x_continuous(breaks = c(0:9),label = class_names)+
scale_y_continuous(breaks = unique(confumat$Prediction),
trans = "reverse",label = class_names)+
scale_fill_gradient2(low="darkblue", high="lightgreen",
guide="colorbar")+
ggtitle("Lenet-5分类器在测试集的结果")
## 可以看出最容易识别错误的是shirt和T-shirt之间的区别
## 训练数据的一个样本
testim <- x_train[18,,,]
par(pty = c("s"))
image(testim,xaxt= "n", yaxt= "n")
## 获取第一个卷积层的输出
dim(testim) <- c(1,28,28,1)
layer_name <- "conv1"
media_layer_model <- keras_model(inputs = model$input,
outputs = get_layer(model, layer_name)$output)
media_output <- predict(media_layer_model, testim)
dim(media_output)
## [1] 1 28 28 6
## 可视化第1卷积层的输出
convnane <- paste("conv1_",1:dim(media_output)[4],sep = "")
par(mfrow = c(2,3),mai=c(0.1,0.1,0.15,0.1))
for(ii in 1:6){
image(media_output[1,,,ii],xaxt= "n", yaxt= "n",main = convnane[ii])
}
## 获取第2个卷积层的输出
layer_name <- "conv2"
media_layer_model <- keras_model(inputs = model$input,
outputs = get_layer(model, layer_name)$output)
media_output <- predict(media_layer_model, testim)
dim(media_output)
## [1] 1 10 10 16
## 可视化第2卷积层的输出
convnane <- paste("conv1_",1:dim(media_output)[4],sep = "")
par(mfrow = c(3,6),mai=c(0.05,0.05,0.15,0.05))
for(ii in 1:16){
image(media_output[1,,,ii],xaxt= "n", yaxt= "n",main = convnane[ii])
}
image_gen <- image_data_generator(samplewise_std_normalization = TRUE,
validation_split = 0.2)
xy_train_s <- flow_images_from_data(x_train,y=y_train,generator = image_gen,
batch_size = 128)
image_gen2 <- image_data_generator(samplewise_std_normalization = TRUE)
xy_test_s <- flow_images_from_data(x_test,y=y_test,generator = image_gen2,
batch_size = 128)
## 建立LeNet-5网络
inputs <- layer_input(shape = c(28,28,1))
prediction <- inputs%>%
layer_batch_normalization(name="norm")%>%
layer_zero_padding_2d(padding = c(2, 2))%>%
layer_conv_2d(6,kernel_size = c(5,5),strides = c(1,1),
padding="valid",activation = "selu",name="conv1")%>%
layer_max_pooling_2d(pool_size = c(2, 2),strides = c(2,2),
name ="pool1" )%>%
layer_conv_2d(16,kernel_size = c(5,5),strides = c(1,1),
padding="valid",activation = "selu",name="conv2")%>%
layer_max_pooling_2d(pool_size = c(2, 2),strides = c(2,2),
name ='pool2' )%>%
layer_flatten()%>%
layer_dense(120,activation="selu",
kernel_initializer = "lecun_normal",name="fc1")%>%
layer_dense(84,activation="selu",
kernel_initializer = "lecun_normal",name="fc2")%>%
layer_dense(10,activation='softmax',name="soft")
model <- keras_model(inputs = inputs,outputs = prediction)
summary(model)
## Model: "model_3"
## ________________________________________________________________________________
## Layer (type) Output Shape Param #
## ================================================================================
## input_2 (InputLayer) [(None, 28, 28, 1)] 0
## ________________________________________________________________________________
## norm (BatchNormalization) (None, 28, 28, 1) 4
## ________________________________________________________________________________
## zero_padding2d_1 (ZeroPadding2D) (None, 32, 32, 1) 0
## ________________________________________________________________________________
## conv1 (Conv2D) (None, 28, 28, 6) 156
## ________________________________________________________________________________
## pool1 (MaxPooling2D) (None, 14, 14, 6) 0
## ________________________________________________________________________________
## conv2 (Conv2D) (None, 10, 10, 16) 2416
## ________________________________________________________________________________
## pool2 (MaxPooling2D) (None, 5, 5, 16) 0
## ________________________________________________________________________________
## flatten_1 (Flatten) (None, 400) 0
## ________________________________________________________________________________
## fc1 (Dense) (None, 120) 48120
## ________________________________________________________________________________
## fc2 (Dense) (None, 84) 10164
## ________________________________________________________________________________
## soft (Dense) (None, 10) 850
## ================================================================================
## Total params: 61,710
## Trainable params: 61,708
## Non-trainable params: 2
## ________________________________________________________________________________
## compile model
model %>% compile(
optimizer = 'rmsprop',
loss = 'categorical_crossentropy',
metrics = c('accuracy')
)
## 模型训练,20%的数据作为验证集
## 设置训练提前停止的条件
# call_es <- callback_early_stopping(monitor = "val_loss",min_delta = 1e-6)
######## 运行下面一行代码可能需要安装Python的scipy库,可在“终端”中输入命令pip install scipy安装
fit_history <- model %>%
fit_generator(generator = xy_train_s, steps_per_epoch = 500,epochs = 20)
## 可视化训练过程
plot(fit_history)+theme_bw(base_family = "STKaiti")+
geom_point(aes(shape=data),size=3)+
ggtitle("LeNet-5模型")
## 计算模型在测试集上的精度
model %>% evaluate_generator(xy_test_s,steps = 80)
## $loss
## [1] 0.4634951
##
## $accuracy
## [1] 0.8964257
原始文本数据集为THUCNews的一个子集。一共包含10类数据,每个分类6500条数据,分别切分为了3个数据集,分别为训练集,验证集和测试集。
数据集划分如下:
训练集: 5000*10
验证集: 500*10
测试集: 1000*10
library(readr)
## 读取数据,数据预处理
train_data = read_csv("data/chap12/cnews/cnews_train.csv")
## Parsed with column specification:
## cols(
## label = col_character(),
## text = col_character(),
## cutword = col_character(),
## cutwordnum = col_double()
## )
val_data = read_csv("data/chap12/cnews/cnews_val.csv")
## Parsed with column specification:
## cols(
## label = col_character(),
## text = col_character(),
## cutword = col_character(),
## cutwordnum = col_double()
## )
test_data = read_csv("data/chap12/cnews/cnews_test.csv")
## Parsed with column specification:
## cols(
## label = col_character(),
## text = col_character(),
## cutword = col_character(),
## cutwordnum = col_double()
## )
head(train_data)
## # A tibble: 6 x 4
## label text cutword cutwordnum
## <chr> <chr> <chr> <dbl>
## 1 体育 马晓旭意外受伤让国奥警惕 无奈大雨格外青睐殷家军记者傅亚雨… 马晓旭 意外 受伤 国奥 警惕 无奈 大雨 格外 青睐 殷… 259
## 2 体育 商瑞华首战复仇心切 中国玫瑰要用美国方式攻克瑞典多曼来了,… 商瑞华 首战 复仇 心切 中国 玫瑰 美国 方式 攻克 瑞… 526
## 3 体育 冠军球队迎新欢乐派对 黄旭获大奖张军赢下PK赛新浪体育讯1… 冠军 球队 迎新 欢乐 派对 黄旭获 大奖 张军 PK 新… 610
## 4 体育 辽足签约危机引注册难关 高层威逼利诱合同笑里藏刀新浪体育讯… 辽足 签约 危机 注册 难关 高层 威逼利诱 合同 笑里藏… 524
## 5 体育 揭秘谢亚龙被带走:总局电话骗局 复制南杨轨迹体坛周报特约记… 揭秘 谢亚龙 带走 总局 电话 骗局 复制 南杨 轨迹 体… 282
## 6 体育 阿的江:八一需重新定位 我们有机会但该进的没进新浪体育讯1… 阿的江 八一 重新 定位 我们 机会 没进 新浪 体育讯 … 332
## 该数据集已经进行了处理,每个数据集包含4列数据,其中第1列为标签数据,第2列为新闻的原文数据,第3列为经过分词、去停用词等操作,并使用空格连接的分词后的数据,第4列为对应词组的个数。
## 查看每条新闻使用词语数量的分布
hist(train_data$cutwordnum,breaks = 50)
summary(train_data$cutwordnum)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.0 115.0 226.0 300.5 382.0 9083.0
## 可以发现最小长度的词组长为4,最大长度为9083,其中长度的平均数为300,长度的中位数为226.
table(train_data$label)
##
## 财经 房产 家居 教育 科技 时尚 时政 体育 游戏 娱乐
## 5000 5000 5000 5000 5000 5000 5000 5000 5000 5000
## 可以发现数据集包括体育、娱乐、家居、房产、教育、时尚、时政、游戏、科技、财经等类型
labelname <- names(table(train_data$label))
labelname
## [1] "财经" "房产" "家居" "教育" "科技" "时尚" "时政" "体育" "游戏" "娱乐"
## 对数据集中的标签数据进行处理
train_lab1 <- factor(train_data$label,levels = labelname,labels = 0:9)
val_lab1 <- factor(val_data$label,levels = labelname,labels = 0:9)
test_lab1 <- factor(test_data$label,levels = labelname,labels = 0:9)
train_lab <- to_categorical(train_lab1,num_classes = 10)
val_lab <- to_categorical(val_lab1,num_classes = 10)
test_lab <- to_categorical(test_lab1,num_classes = 10)
## 使用Tokenizer对词组进行编码
## 当创建了一个Tokenizer对象后,使用fit_text_tokenizer()函数,以空格去识别每个词
## 可以将输入文本中的每个词编号,词频越大,编号越小
max_words = 6000
max_len = 600
tok = text_tokenizer(num_words=max_words) ## 使用的最大词语数为6000
tok_fit <- tok%>%fit_text_tokenizer(train_data$cutword)
## 使用word_index属性可以看到每个词对应的编码
## 使用word_counts属性可以看到每个词对应的频数
unlist(tok_fit$word_index[1:10])
## 我们 一个 中国 可以 基金 没有 自己 他们 市场 这个
## 1 2 3 4 5 6 7 8 9 10
## unlist(train_tok$word_counts[1:10])
## 使用texts_to_sequences()将数据转化为序列,并使用pad_sequences将每个序列调整为相同的长度。
train_tok <- tok_fit%>%texts_to_sequences(train_data$cutword)%>%
pad_sequences(maxlen = max_len,padding = "pre",truncating = "post")
val_tok <- tok_fit%>%texts_to_sequences(val_data$cutword)%>%
pad_sequences(maxlen = max_len,padding = "pre",truncating = "post")
test_tok <- tok_fit%>%texts_to_sequences(test_data$cutword)%>%
pad_sequences(maxlen = max_len,padding = "pre",truncating = "post")
dim(train_tok)
## [1] 50000 600
dim(val_tok)
## [1] 5000 600
dim(test_tok)
## [1] 10000 600
train_tok[1:10,1:10]
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,] 0 0 0 0 0 0 0 0 0 0
## [2,] 0 0 0 0 0 0 0 0 0 0
## [3,] 0 0 0 0 0 0 0 0 0 0
## [4,] 0 0 0 0 0 0 0 0 0 0
## [5,] 0 0 0 0 0 0 0 0 0 0
## [6,] 0 0 0 0 0 0 0 0 0 0
## [7,] 0 0 0 0 0 0 0 0 0 0
## [8,] 0 0 0 0 0 0 0 0 0 0
## [9,] 2270 5097 223 181 1866 63 536 181 2270 528
## [10,] 0 0 0 0 0 0 0 0 0 0
## LSTM模型
## 定义LSTM模型
lstminputs <- layer_input(shape=max_len)
lstmprediction <- lstminputs%>%
layer_embedding(input_dim = max_words+1,output_dim = 64,
input_length = max_len)%>%
layer_lstm(128,activation = "tanh")%>%
layer_dropout(0.5)%>%
layer_dense(128,activation="relu",name="FC1")%>%
layer_dropout(0.5)%>%
layer_dense(64,activation="relu",name="FC2")%>%
layer_dense(10,activation="softmax",name="soft")
lstmmodel <- keras_model(inputs = lstminputs,outputs = lstmprediction)
summary(lstmmodel)
## Model: "model_4"
## ________________________________________________________________________________
## Layer (type) Output Shape Param #
## ================================================================================
## input_3 (InputLayer) [(None, 600)] 0
## ________________________________________________________________________________
## embedding (Embedding) (None, 600, 64) 384064
## ________________________________________________________________________________
## lstm (LSTM) (None, 128) 98816
## ________________________________________________________________________________
## dropout (Dropout) (None, 128) 0
## ________________________________________________________________________________
## FC1 (Dense) (None, 128) 16512
## ________________________________________________________________________________
## dropout_1 (Dropout) (None, 128) 0
## ________________________________________________________________________________
## FC2 (Dense) (None, 64) 8256
## ________________________________________________________________________________
## soft (Dense) (None, 10) 650
## ================================================================================
## Total params: 508,298
## Trainable params: 508,298
## Non-trainable params: 0
## ________________________________________________________________________________
## compile model
lstmmodel%>%compile(loss="categorical_crossentropy",
optimizer=optimizer_adam(),
metrics="accuracy")
## 设置训练提前停止的条件
# call_es <- callback_early_stopping(monitor = "val_loss",patience=1,
# baseline = 0.000001)
# lstm_history <- lstmmodel%>%fit(x=train_tok,y=train_lab,batch_size = 64,
# epochs = 10,
# validation_data = list(val_tok,val_lab),
# callbacks = call_es)
# set.seed(123)
######## 下面一句代码运行时间会较长
lstm_history <- lstmmodel%>%fit(x=train_tok,y=train_lab,batch_size = 128,
epochs = 10,
validation_data = list(val_tok,val_lab))
## 可视化训练的过程
plot(lstm_history)+theme_bw(base_family = "STKaiti")+
geom_point(aes(shape=data),size=3)+
ggtitle("LSTM新闻分类模型")
## 计算模型在测试集上的精度
lstmmodel %>% evaluate(test_tok,test_lab)
## $loss
## [1] 0.9451882
##
## $accuracy
## [1] 0.7806
test_pre <- lstmmodel %>% predict(test_tok)
test_preclass <- as.factor(apply(test_pre,1,which.max)-1)
confusionMatrix(test_preclass,test_lab1)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1 2 3 4 5 6 7 8 9
## 0 984 129 145 13 1 0 11 0 1 0
## 1 2 780 19 2 0 0 3 0 0 1
## 2 0 6 318 4 1 3 2 0 0 0
## 3 5 2 50 829 3 13 66 0 12 1
## 4 1 1 15 3 568 4 5 0 0 1
## 5 1 0 135 2 1 924 0 0 0 1
## 6 5 8 17 7 5 1 841 1 0 0
## 7 0 1 0 4 0 1 0 990 5 0
## 8 0 0 1 0 10 0 0 0 576 0
## 9 2 73 300 136 411 54 72 9 406 996
##
## Overall Statistics
##
## Accuracy : 0.7806
## 95% CI : (0.7724, 0.7887)
## No Information Rate : 0.1
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7562
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 0 Class: 1 Class: 2 Class: 3 Class: 4 Class: 5
## Sensitivity 0.9840 0.7800 0.3180 0.8290 0.5680 0.9240
## Specificity 0.9667 0.9970 0.9982 0.9831 0.9967 0.9844
## Pos Pred Value 0.7664 0.9665 0.9521 0.8451 0.9498 0.8684
## Neg Pred Value 0.9982 0.9761 0.9294 0.9810 0.9541 0.9915
## Prevalence 0.1000 0.1000 0.1000 0.1000 0.1000 0.1000
## Detection Rate 0.0984 0.0780 0.0318 0.0829 0.0568 0.0924
## Detection Prevalence 0.1284 0.0807 0.0334 0.0981 0.0598 0.1064
## Balanced Accuracy 0.9753 0.8885 0.6581 0.9061 0.7823 0.9542
## Class: 6 Class: 7 Class: 8 Class: 9
## Sensitivity 0.8410 0.9900 0.5760 0.9960
## Specificity 0.9951 0.9988 0.9988 0.8374
## Pos Pred Value 0.9503 0.9890 0.9813 0.4050
## Neg Pred Value 0.9826 0.9989 0.9550 0.9995
## Prevalence 0.1000 0.1000 0.1000 0.1000
## Detection Rate 0.0841 0.0990 0.0576 0.0996
## Detection Prevalence 0.0885 0.1001 0.0587 0.2459
## Balanced Accuracy 0.9181 0.9944 0.7874 0.9167
## 使用混淆矩阵热力图可视化哪些预测正确
confum <- confusionMatrix(test_preclass,test_lab1)
confumat <- as.data.frame(confum$table)
confumat[,1:2] <- apply(confumat[,1:2],2,as.integer)
# label = labels(test_lab1)
ggplot(confumat,aes(x=Reference,y = Prediction))+
geom_tile(aes(fill = Freq))+
geom_text(aes(label = Freq))+
scale_x_continuous(breaks = c(0:9),labels = labelname)+
scale_y_continuous(breaks = unique(confumat$Prediction),
trans = "reverse",labels = labelname)+
scale_fill_gradient2(low="darkblue", high="lightgreen",
guide="colorbar")+
ggtitle("LSTM新闻分类")
library(keras)
library(tensorflow)
##
## Attaching package: 'tensorflow'
## The following object is masked from 'package:caret':
##
## train
# 使用VGG16识别图像
#####################
## 运行下面一行代码时,会自动下载vgg16_weights_tf_dim_ordering_tf_kernels.h5文件,若下载速度较慢或出错,可以自己从https://github.com/fchollet/deep-learning-models/releases/tag/v0.1上下载,Program\Other文件夹中也提供了该下载好的文件,将其放入用户目录下的~/.keras/models中,再运行代码时则会自动载入。
####################
model <- application_vgg16(weights = "imagenet", include_top = TRUE)
## 读取数据并将图像处理为224*224大小进行预处理
######## 运行下面一行代码可能需要安装Python的pillow库,可在“终端”中输入命令pip install pillow安装
img <- image_load("data/chap12/tiger.jpg", target_size = c(224,224))
imgx <- image_to_array(img)
imgx <- array_reshape(imgx, c(1, dim(imgx)))
imgx <- imagenet_preprocess_input(imgx)
## 使用VGG16预测类别
vgg16cla <- model %>% predict(imgx)
imagenet_decode_predictions(vgg16cla, top = 3)[[1]]
## class_name class_description score
## 1 n02129604 tiger 0.8723027110
## 2 n02123159 tiger_cat 0.1276341677
## 3 n02128925 jaguar 0.0000537034
## 导入模型
#####################
## 运行下面一行代码时,会自动下载vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5文件,若下载速度较慢或出错,可以自己从https://github.com/fchollet/deep-learning-models/releases/tag/v0.1上下载,Program\Other文件夹中也提供了该下载好的文件,将其放入用户目录下的~/.keras/models中,再运行代码时则会自动载入。
####################
model <- application_vgg16(weights = "imagenet", include_top = FALSE)
img <- image_load("data/chap12/tiger.jpg", target_size = c(224,224))
x <- image_to_array(img)
x <- array_reshape(x, c(1,dim(x)))
x <- imagenet_preprocess_input(x)
features <- model %>% predict(x)
dim(features)
## [1] 1 7 7 512
par(mfrow = c(20,20),mai=c(0.005,0.005,0.005,0.005))
## 可视化前400个特征映射
for(ii in 1:400){
image(features[1,,,ii],xaxt= "n", yaxt= "n",main = "VGG16 features")
}