This commit is contained in:
root 2026-03-12 12:48:18 +11:00
parent 3b0f4992a2
commit b4142df483
1 changed files with 6 additions and 5 deletions

View File

@ -15,7 +15,7 @@ async function safeGetContent(page) {
} catch (err) { } catch (err) {
// Nếu navigation xảy ra → chờ page ổn định lại rồi đọc tiếp // Nếu navigation xảy ra → chờ page ổn định lại rồi đọc tiếp
if (err.message.includes("Execution context was destroyed")) { if (err.message.includes("Execution context was destroyed")) {
await page.waitForNetworkIdle({ idleTime: 1000 }).catch(() => {}); await page.waitForNetworkIdle({ idleTime: 1000 }).catch(() => { });
return await page.content(); return await page.content();
} }
throw err; throw err;
@ -117,11 +117,12 @@ async function scrapeWithPuppeteer(store) {
nodes.forEach((node) => { nodes.forEach((node) => {
const payload = {}; const payload = {};
// ---------------- LINK + ID ---------------- // ---------------- LINK + ID ----------------
const linkEl = node.querySelector("div.su-media__image a"); const linkEl = node.querySelector("div.su-image a");
// if (!linkEl) return; // if (!linkEl) return;
const linkDetail = linkEl && linkEl?.href ? linkEl?.href : ""
payload.link_detail = linkEl?.href || ""; if (!linkDetail) return;
const idMatch = linkEl.href.match(/\/itm\/(\d+)/); payload.link_detail = linkDetail;
const idMatch = linkDetail.match(/\/itm\/(\d+)/);
if (!idMatch) return; if (!idMatch) return;
payload.id = idMatch[1]; payload.id = idMatch[1];