实现了一个基于 Express 框架的简单爬虫应用,用于从指定的网页中抓取数据,并将数据整理后通过 HTTP 接口返回。以下是对代码的详细解析:
界面实现
后端
模块导入
const express = require('express');
const router = express.Router();
const request = require('request');
const cheerio = require('cheerio');
const fs = require('fs');
express
:用于创建 web 服务器和路由。request
:用于发送 HTTP 请求,获取网页内容。cheerio
:用于解析 HTML 文档,类似于 jQuery 的语法。fs
:用于文件系统操作(虽然代码中未使用)。
常量定义
const url = 'https://www.maigoo.com/top/427945.html';
const objWithUrl = {};
const ScoreData = [];
const TuiJianDataMessage = [];
const JingDianMessage = [];
url
:目标网页的 URL。objWithUrl
:用于存储最终整合后的数据。ScoreData
、TuiJianDataMessage
、JingDianMessage
:分别用于存储从网页中提取的不同类型的数据。
数据提取函数
function ScoreFunction($) {
this.ScoreData = [];
$('.md_citiao >.rowlist').find('.pbox').each(function (index, value) {
ScoreData.push({
id: index + 1,
paiming: $(value).find('.pitem>.item>.num').text(),
detailPage: $(value).find('.pitem>.item>.md_title>.font18').attr('href'),
title: $(value).find('.pitem>.item>.md_title>a>i').text(),
tese_em: $(value).find('.pitem>.item>.md_title>.tese_em').map(function () {
return $(this).text();
}).get(),
score: $(value).find('.pitem>.item>.attention>.att').text(),
});
});
}
ScoreFunction
:从网页中提取评分相关的数据,并将其存储在ScoreData
数组中。
function TuiJianData($) {
this.TuiJianData = [];
$('.md_citiao >.citiaobtnlist').find('.dhidden').each(function (index, value) {
TuiJianDataMessage.push({
id: index + 1,
name: $(value).find('a').text(),
url: $(value).find('a').attr('href'),
});
});
}
TuiJianData
:从网页中提取推荐数据,并将其存储在TuiJianDataMessage
数组中。
function JingDianData($) {
this.JingDianMessage = [];
$('.mod_cont>.md_citiao').find('.pbox').each(function (index, value) {
JingDianMessage.push({
id: index + 1,
title: $(value).find('.md_title>.title>a').text(),
jingdianUrl: $(value).find('.md_title>.title>a').attr('href'),
tese_em: $(value).find('.md_title>.sc_tese').map(function () {
return $(this).text();
}).get(),
pic: $(value).find('.pic>.img>a>img').attr('src'),
looktukuUrl: $(value).find('.pic>.img>.looktuku').attr('href') || '暂无图集',
desc: $(value).find('.content').text(),
descUrl: $(value).find('.content>a').attr('href'),
address: $(value).find('.address').text(),
});
});
}
JingDianData
:从网页中提取景点相关的数据,并将其存储在JingDianMessage
数组中。
数据整合函数
function ZhengHeData() {
objWithUrl.ScoreData = ScoreData.splice(0, 10);
objWithUrl.TuiJianDataMessage = TuiJianDataMessage.splice(0, 10);
objWithUrl.JingDianMessage = JingDianMessage.splice(0, 10);
return objWithUrl;
}
ZhengHeData
:将提取的数据整合到一个对象中,并返回该对象。
路由处理
router.get('/data', (req, res) => {
request(url, function (error, response, body) {
if (!error && response.statusCode == 200) {
const $ = cheerio.load(body);
ScoreFunction($);
TuiJianData($);
JingDianData($);
res.send({
code: 1,
msg: '数据获取成功!',
data: ZhengHeData()
});
} else {
res.send({
code: 0,
msg: '数据获取失败!',
data: null
});
}
});
});
/data
路由:当访问/data
时,发送 HTTP 请求获取网页内容,解析并提取数据,最后将整合后的数据返回给客户端。
模块导出
module.exports = router;
- 将路由模块导出,以便在其他文件中使用。
完整代码
const express = require('express');
const router = express.Router();
const request = require('request');
const cheerio = require('cheerio');
const fs = require('fs')
const url = 'https://www.maigoo.com/top/427945.html';
const objWithUrl = {}
const ScoreData = []
const TuiJianDataMessage = []
const JingDianMessage = []
function ScoreFunction($) {
this.ScoreData = []
$('.md_citiao >.rowlist').find('.pbox').each(function (index, value) {
ScoreData.push({
id: index + 1,
paiming: $(value).find('.pitem>.item>.num').text(),
detailPage: $(value).find('.pitem>.item>.md_title>.font18').attr('href'),
title: $(value).find('.pitem>.item>.md_title>a>i').text(),
tese_em: $(value).find('.pitem>.item>.md_title>.tese_em').map(function () {
return $(this).text();
}).get(),
score: $(value).find('.pitem>.item>.attention>.att').text(),
})
})
}
function TuiJianData($) {
this.TuiJianData = []
$('.md_citiao >.citiaobtnlist').find('.dhidden').each(function (index, value) {
TuiJianDataMessage.push({
id: index + 1,
name: $(value).find('a').text(),
url: $(value).find('a').attr('href'),
})
})
}
function JingDianData($) {
this.JingDianMessage = []
$('.mod_cont>.md_citiao').find('.pbox').each(function (index, value) {
JingDianMessage.push({
id: index + 1,
title: $(value).find('.md_title>.title>a').text(),
jingdianUrl: $(value).find('.md_title>.title>a').attr('href'),
tese_em: $(value).find('.md_title>.sc_tese').map(function () {
return $(this).text();
}).get(),
pic: $(value).find('.pic>.img>a>img').attr('src'),
looktukuUrl: $(value).find('.pic>.img>.looktuku').attr('href') || '暂无图集',
desc: $(value).find('.content').text(),
descUrl: $(value).find('.content>a').attr('href'),
address: $(value).find('.address').text(),
})
})
}
function ZhengHeData() {
objWithUrl.ScoreData = ScoreData.splice(0, 10);
objWithUrl.TuiJianDataMessage = TuiJianDataMessage.splice(0, 10);
objWithUrl.JingDianMessage = JingDianMessage.splice(0, 10);
return objWithUrl;
}
router.get('/data', (req, res) => {
request(url, function (error, response, body) {
if (!error && response.statusCode == 200) {
const $ = cheerio.load(body);
ScoreFunction($)
TuiJianData($)
JingDianData($)
res.send({
code: 1,
msg: '数据获取成功!',
data: ZhengHeData()
})
} else {
res.send({
code: 0,
msg: '数据获取失败!',
data: null
});
}
});
})
module.exports = router;
前端
数据获取与初始化
在 created
钩子中,通过 shuJuList
API 获取数据,并将数据分别赋值给 ScoreData
、TuiJianDataMessage
和 JingDianMessage
。如果数据获取失败,捕获错误并输出到控制台。
async created() {
try {
const res = await shuJuList();
this.ScoreData = res.data.data.ScoreData;
this.TuiJianDataMessage = res.data.data.TuiJianDataMessage;
this.JingDianMessage = res.data.data.JingDianMessage;
} catch (error) {
console.error("数据获取失败:", error);
}
}
ECharts 初始化
在 mounted
钩子中,调用 initEcharts1
和 initEcharts2
方法初始化两个 ECharts 图表。
mounted() {
this.initEcharts1();
this.initEcharts2();
}
ECharts 配置与渲染
initEcharts
方法用于初始化饼图,initPieChart
方法用于配置和渲染具体的饼图。通过传入容器 ID、标题、数据和颜色数组,动态生成饼图。
methods: {
initEcharts() {
this.initPieChart('echarts-container1', '景区售票占比', this.getScenicData(), ['#ff7f50', '#87cefa', '#da70d6', '#32cd32']);
this.initPieChart('echarts-container2', '人数占比', this.getVisitorData(), ['#ff69b4', '#add8e6', '#90ee90', '#ffa07a']);
},
initPieChart(containerId, titleText, data, colors) {
var myChart = echarts.init(document.getElementById(containerId));
var option = {
title: {
text: titleText,
left: 'center',
},
tooltip: {
trigger: 'item',
formatter: '{a} <br/>{b}: {c} ({d}%)',
},
legend: {
orient: 'vertical',
left: 10,
data: data.map(item => item.name),
},
series: [
{
name: '数据',
type: 'pie',
radius: '55%',
center: ['50%', '60%'],
data: data,
roseType: 'area',
itemStyle: {
color: function (params) {
return colors[params.dataIndex];
},
},
emphasis: {
itemStyle: {
shadowBlur: 10,
shadowOffsetX: 0,
shadowColor: 'rgba(0, 0, 0, 0.5)',
},
},
},
],
};
myChart.setOption(option);
}
}
数据获取方法
getScenicData
和 getVisitorData
方法用于获取景区售票和人数占比的数据。这些数据可以是从 API 获取的,也可以是本地模拟的。
methods: {
getScenicData() {
return [
{ name: '景区A', value: 40 },
{ name: '景区B', value: 30 },
{ name: '景区C', value: 20 },
{ name: '景区D', value: 10 },
];
},
getVisitorData() {
return [
{ name: '游客A', value: 50 },
{ name: '游客B', value: 30 },
{ name: '游客C', value: 15 },
{ name: '游客D', value: 5 },
];
}
}
样式与布局
在模板中,使用 el-row
和 el-col
进行布局,左侧展示排名和推荐景点,右侧展示详细信息与 ECharts 图表。通过 el-carousel
实现轮播效果,展示景点的详细信息。
<template>
<div class="dashboard-container">
<el-row :gutter="20">
<el-col :span="8">
<div class="card ranking-list">
<h3 class="card-title">景点综合排名</h3>
<div v-for="item in ScoreData" :key="item.id" class="ranking-item">
<span class="rank-number">{{ item.paiming }}</span>
<div class="content">
<h4>
<a :href="item.detailPage" target="_blank">{{ item.title }}</a>
</h4>
<div class="tags">
<span v-for="(tag, index) in item.tese_em" :key="index" class="tag">{{ tag }}</span>
</div>
<div class="score">{{ item.score }}</div>
</div>
</div>
</div>
<div class="card recommendation">
<h3 class="card-title">推荐景点</h3>
<div class="tags-group">
<a v-for="item in TuiJianDataMessage" :key="item.id" :href="item.url" target="_blank" class="recommend-tag">{{ item.name }}</a>
</div>
</div>
</el-col>
<el-col :span="16">
<el-carousel loop="false" autoplay="false" motion-blur>
<el-carousel-item v-for="item in JingDianMessage" class="card detail-card" height="530px" :key="item.id">
<h3 class="card-title">{{ item.title }}</h3>
<div class="detail-content">
<img :src="item.pic" class="detail-image" />
<div class="detail-info">
<div class="tags">
<span v-for="(tag, index) in item.tese_em[0].split('\t')" :key="index" class="tag">{{ tag.trim() }}</span>
</div>
<p class="desc">{{ item.desc.replace(/\n|\t/g, "") }}</p>
<div class="address">
<i class="el-icon-location"></i>
{{ item.address }}
</div>
<a :href="item.looktukuUrl" target="_blank" class="image-link">查看图集</a>
<a :href="item.jingdianUrl" target="_blank" class="image-link">景点详情</a>
</div>
</div>
</el-carousel-item>
</el-carousel>
<div class="card dashboard-echarts">
<div class="echarts" id="echarts-container1"></div>
<div class="echarts" id="echarts-container2"></div>
</div>
</el-col>
</el-row>
</div>
</template>
样式优化
通过 CSS 对卡片、排名列表、推荐标签、详细信息等元素进行样式优化,确保页面布局美观且易于阅读。
.dashboard-container {
padding: 20px;
}
.card {
background: #fff;
border-radius: 8px;
padding: 20px;
margin-bottom: 20px;
box-shadow: 0 2px 12px 0 rgba(0, 0, 0, 0.1);
}
.ranking-item {
display: flex;
align-items: center;
margin-bottom: 10px;
}
.rank-number {
font-size: 18px;
font-weight: bold;
margin-right: 10px;
}
.tags {
display: flex;
flex-wrap: wrap;
}
.tag {
background: #f0f0f0;
padding: 2px 8px;
border-radius: 4px;
margin-right: 5px;
margin-bottom: 5px;
}
.recommend-tag {
display: inline-block;
padding: 5px 10px;
background: #f0f0f0;
border-radius: 4px;
margin-right: 10px;
margin-bottom: 10px;
}
.detail-image {
width: 100%;
height: auto;
border-radius: 8px;
}
.detail-info {
margin-top: 10px;
}
.image-link {
display: inline-block;
margin-right: 10px;
color: #409eff;
text-decoration: none;
}
.echarts {
width: 100%;
height: 300px;
}
完整代码
<template>
<div class="dashboard-container">
<el-row :gutter="20">
<!-- 左侧排名 -->
<el-col :span="8">
<div class="card ranking-list">
<h3 class="card-title">景点综合排名</h3>
<div v-for="item in ScoreData" :key="item.id" class="ranking-item">
<span class="rank-number">{{ item.paiming }}</span>
<div class="content">
<h4>
<a :href="item.detailPage" target="_blank">{{ item.title }}</a>
</h4>
<div class="tags">
<span
v-for="(tag, index) in item.tese_em"
:key="index"
class="tag"
>{{ tag }}</span
>
</div>
<div class="score">{{ item.score }}</div>
</div>
</div>
</div>
<div class="card recommendation">
<h3 class="card-title">推荐景点</h3>
<div class="tags-group">
<a
v-for="item in TuiJianDataMessage"
:key="item.id"
:href="item.url"
target="_blank"
class="recommend-tag"
>{{ item.name }}</a
>
</div>
</div>
</el-col>
<!-- 右侧详细信息 -->
<el-col :span="16">
<el-carousel loop="false" autoplay="false" motion-blur>
<el-carousel-item
v-for="item in JingDianMessage"
class="card detail-card"
height="530px"
:key="item.id"
>
<h3 class="card-title">{{ item.title }}</h3>
<div class="detail-content">
<img :src="item.pic" class="detail-image" />
<div class="detail-info">
<div class="tags">
<span
v-for="(tag, index) in item.tese_em[0].split('\t')"
:key="index"
class="tag"
>{{ tag.trim() }}</span
>
</div>
<p class="desc">{{ item.desc.replace(/\n|\t/g, "") }}</p>
<div class="address">
<i class="el-icon-location"></i>
{{ item.address }}
</div>
<a :href="item.looktukuUrl" target="_blank" class="image-link"
>查看图集</a
>
<a :href="item.jingdianUrl" target="_blank" class="image-link"
>景点详情</a
>
</div>
</div>
</el-carousel-item>
</el-carousel>
<div class="card dashboard-echarts">
<div class="echarts" id="echarts-container1"></div>
<div class="echarts" id="echarts-container2"></div>
</div>
</el-col>
</el-row>
</div>
</template>
<script>
import * as echarts from "echarts";
import { shuJuList } from "../../api/shuju.js";
export default {
data() {
return {
ScoreData: [],
TuiJianDataMessage: [],
JingDianMessage: [],
echartsOneXdata:[]
};
},
async created() {
try {
const res = await shuJuList();
this.ScoreData = res.data.data.ScoreData;
this.TuiJianDataMessage = res.data.data.TuiJianDataMessage;
this.JingDianMessage = res.data.data.JingDianMessage;
} catch (error) {
console.error("数据获取失败:", error);
}
},
mounted() {
this.initEcharts1();
this.initEcharts2();
},
methods: {
initEcharts() {
this.initPieChart('echarts-container1', '景区售票占比', this.getScenicData(), ['#ff7f50', '#87cefa', '#da70d6', '#32cd32']);
this.initPieChart('echarts-container2', '人数占比', this.getVisitorData(), ['#ff69b4', '#add8e6', '#90ee90', '#ffa07a']);
},
initPieChart(containerId, titleText, data, colors) {
// 基于准备好的dom,初始化echarts实例
var myChart = echarts.init(document.getElementById(containerId));
// 指定图表的配置项和数据
var option = {
title: {
text: titleText,
left: 'center',
},
tooltip: {
trigger: 'item',
formatter: '{a} <br/>{b}: {c} ({d}%)',
},
legend: {
orient: 'vertical',
left: 10,
data: data.map(item => item.name),
},
series: [
{
name: '数据',
type: 'pie',
radius: '55%',
center: ['50%', '60%'],
data: data,
roseType: 'area', // 设置为玫瑰图
itemStyle: {
color: function (params) {
// 使用传入的颜色数组
return colors[params.dataIndex];
},
},
emphasis: {
itemStyle: {
shadowBlur: 10,
shadowOffsetX: 0,
shadowColor: 'rgba(0, 0, 0, 0.5)',
},
},
},
],
};
// 使用刚指定的配置项和数据显示图表
myChart.setOption(option);
},
getScenicData() {
return [
{ value: 30, name: '西湖' },
{ value: 20, name: '宋城' },
{ value: 25, name: '灵隐寺' },
{ value: 15, name: '断桥残雪' },
];
},
getVisitorData() {
return [
{ value: 20000, name: '西湖' },
{ value: 15000, name: '宋城' },
{ value: 10000, name: '灵隐寺' },
{ value: 5000, name: '断桥残雪' },
];
},
},
mounted() {
this.initEcharts();
}
};
</script>
<style lang="scss" scoped>
.dashboard-container {
padding: 10px;
background: #f0f2f5;
min-height: 65vh;
.dashboard-title {
text-align: center;
color: #333;
margin-bottom: 30px;
font-size: 28px;
}
.card {
background: white;
border-radius: 8px;
padding: 20px;
margin-bottom: 20px;
box-shadow: 0 2px 12px 0 rgba(0, 0, 0, 0.1);
&-title {
color: #333;
margin-bottom: 15px;
font-size: 18px;
border-left: 4px solid #409eff;
padding-left: 10px;
}
}
.ranking-list {
height: 50vh;
overflow-y: auto;
.ranking-item {
display: flex;
align-items: center;
padding: 15px 0;
border-bottom: 1px solid #eee;
.rank-number {
font-size: 24px;
color: #409eff;
min-width: 50px;
text-align: center;
}
.content {
flex: 1;
h4 {
margin: 0 0 8px;
font-size: 16px;
a {
color: #333;
text-decoration: none;
&:hover {
color: #409eff;
}
}
}
.tags {
margin-bottom: 8px;
.tag {
display: inline-block;
background: #f4f4f5;
color: #909399;
padding: 5px 10px;
border-radius: 4px;
margin-right: 8px;
margin-bottom: 5px;
font-size: 12px;
}
}
.score {
color: #f56c6c;
font-weight: bold;
}
}
}
}
.recommendation {
.tags-group {
display: flex;
flex-wrap: wrap;
gap: 10px;
.recommend-tag {
background: #409eff;
color: white;
padding: 8px 15px;
border-radius: 20px;
text-decoration: none;
transition: all 0.3s;
&:hover {
background: darken(#409eff, 10%);
transform: translateY(-2px);
}
}
}
}
.detail-card {
.detail-content {
display: flex;
gap: 20px;
box-sizing: border-box;
.detail-image {
width: 300px;
height: 200px;
border-radius: 6px;
object-fit: cover;
}
.detail-info {
flex: 1;
.desc {
color: #666;
line-height: 1.6;
margin: 10px 0;
width: 560px;
display: -webkit-box;
-webkit-line-clamp: 4;
-webkit-box-orient: vertical;
text-overflow: ellipsis;
overflow: hidden;
}
.address {
color: #999;
margin-top: 15px;
i {
margin-right: 5px;
}
}
.image-link {
display: inline-block;
margin-top: 10px;
color: #409eff;
text-decoration: none;
margin-right: 20px;
&:hover {
text-decoration: underline;
}
}
}
}
}
.dashboard-echarts {
margin-top: 20px;
display: flex;
justify-content: space-between;
.echarts {
width: 48%;
// background-color: red;
height: 400px;
}
}
}
</style>
项目地址
欢迎大家点击查看,方便的话点一个star~