cheerio语法类似jQuery
doc
- doc-zh:
安装
npm i cheerio
代码实例
const cheerio = require("cheerio");
const doc = cheerio.load('<h2 class="title">Hello world</h2>', {
xmlMode: true,
decodeEntities: false
});
doc("h2.title").text("Hello there!");
doc("h2").addClass("welcome");
console.log(doc.xml());
// <h2 class="title welcome">Hello there!</h2>
项目实战
import cheerio from "cheerio";
/**
* 将外链图片转为本站连接
* @param {*} html
* @returns
*/
export async function replaceImage(html) {
const doc = cheerio.load(html, {
xmlMode: true,
decodeEntities: false
});
let elems = [];
// each不等待promise
doc("img").each(function(index, elem) {
elems.push(doc(this));
});
for (let elem of elems) {
let src = elem.attr("src");
if (src && src.indexOf(process.env.VUE_APP_BASE_URL) == -1) {
// 修改为自己的替换方法
let imageSrc = await saveImage(src);
if (imageSrc) {
elem.attr("src", imageSrc);
}
}
}
return doc.xml();
}
/**
* 提取图片连接
* @param {*} html
* @returns
*/
export function extractImages(html) {
const doc = cheerio.load(html, {
xmlMode: true,
decodeEntities: false
});
let images = [];
doc("img").each(function(index, elem) {
let src = doc(this).attr("src");
if (src) {
images.push(src);
}
});
return images;
}
/**
* 移除style属性
* @param {*} html
* @returns
*/
export function removeStyle(html) {
const doc = cheerio.load(html, {
xmlMode: true,
decodeEntities: false
});
doc("*[style]").removeAttr("style");
return doc.xml();
}
xml和html
const cheerio = require("cheerio");
const doc = cheerio.load("<a></a>");
// xml模式输出,a标签被处理成自闭合标签
console.log(doc.xml());
// <html><head/><body><a/></body></html>
// html格式输出,a标签没有被处理
console.log(doc.html());
// <html><head></head><body><a></a></body></html>
如果只是使用html片段,可以自己处理html返回后的结果
const cheerio = require("cheerio");
function getDom(html) {
return cheerio.load(html);
}
function toHtml(doc) {
// 将生成文本多余的标签去除
let html = doc.html();
let pattern = /<html><head><\/head><body>([\s\S]*)<\/body><\/html>/;
let res = html.match(pattern);
return res[1];
}
console.log(toHtml(getDom("<a></a>")));
// <a></a>