From 703fbcffe7e614843f821d53894ff68518f88335 Mon Sep 17 00:00:00 2001 From: nguyentrungthat <80239428+nguentrungthat@users.noreply.github.com> Date: Wed, 22 Oct 2025 11:01:11 +0700 Subject: [PATCH] Update index.js --- index.js | 141 +++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 100 insertions(+), 41 deletions(-) diff --git a/index.js b/index.js index 2bddce5..1464dda 100644 --- a/index.js +++ b/index.js @@ -3,11 +3,11 @@ import puppeteer from "puppeteer"; import axios from "axios"; import * as cheerio from "cheerio"; import nodemailer from "nodemailer"; -import path from "path"; import dayjs from "dayjs"; import mysql from "mysql2/promise"; -const EMAILS = ["andrew.ng@apactech.io", "dev@apactech.io"]; +// const EMAILS = ["andrew.ng@apactech.io", "dev@apactech.io"]; +const EMAILS = ["andrew.ng@apactech.io"]; const LIST_STORE = [ { @@ -28,7 +28,12 @@ const LIST_STORE = [ }, ]; -async function scrapeWithPuppeteer(url) { +// Define function promise waiting for a given time +async function wait(ms) { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +async function scrapeWithPuppeteer(url, name) { try { const browser = await puppeteer.launch({ headless: true, @@ -37,13 +42,36 @@ async function scrapeWithPuppeteer(url) { const page = await browser.newPage(); await page.goto(url, { waitUntil: "networkidle2" }); + let retries = 0; + while (retries < 10) { + try { + console.log(`${name} retry ${retries + 1}`); + const html = await page.content(); + + // Detect Cloudflare or other blocking messages + if (html.includes("Checking your browser")) { + await wait(5000); + retries++; + continue; + } + + const hasItems = await page.$("li.s-card--horizontal"); + if (hasItems) break; + + await wait(2000); + retries++; + } catch (err) { + await wait(2000); + retries++; + } + } + // Wait for cards or detect Cloudflare const html = await page.content(); const needBrowserCheck = html.includes("Checking your browser"); if (needBrowserCheck) { await page.waitForSelector("li.s-card--horizontal", { timeout: 15000 }).catch(() => null); } - const items = await page.$$eval("li.s-card--horizontal", (nodes) => { const results = []; nodes.forEach((node) => { @@ -77,14 +105,14 @@ async function scrapeWithPuppeteer(url) { const priceEl = node.querySelector(".s-card__price"); if (priceEl) { let txt = priceEl.textContent.replace(",", "").trim(); - txt = txt.replace("£", "GBP ").replace("$", "USD "); + txt = txt.replace("£", "GBP").replace("$", "USD"); const match = txt.match(/([A-Za-z]{3})?\s?([\d.,]+)\s?([A-Za-z]{3})?/); if (match) { - payload.currency = match[1] || ""; + payload.currency = match[1] || match[3] || ""; payload.price = match[2] || ""; + if (!payload.currency?.trim()) payload.currency = "USD"; } } - if (payload.id) results.push(payload); }); return results; @@ -131,38 +159,67 @@ async function scrapeWithCheerio(url) { const currency = match[1] || match[3] || ""; payload.price = match[2]; payload.currency = currency.replace("£", "GBP").replace("$", "USD"); - if(!payload.currency?.trim()) payload.currency = "USD"; + if (!payload.currency?.trim()) payload.currency = "USD"; } - if (payload.id) items.push(payload); }); return items; } -async function sendMail(subject, body, attachmentPath = null) { - const transporter = nodemailer.createTransport({ - host: process.env.MAIL_HOST, - port: process.env.MAIL_PORT, - secure: true, - auth: { user: process.env.MAIL_USERNAME, pass: process.env.MAIL_PASSWORD }, - }); +async function sendMail(subject, body, isError = false) { + try { + await axios({ + url: process.env.API_DISTI_HOST + "/api/export/items-sold-out", + method: "GET", + responseType: "arraybuffer", + timeout: 60000, + headers: { + "x-key": "CanTho#1", + }, + }).then(async (response) => { + // console.log(response); + const now = new Date(); + const year = now.getFullYear(); + const month = String(now.getMonth() + 1).padStart(2, "0"); + const day = String(now.getDate()).padStart(2, "0"); + const fileName = `items_sold_out_${year}_${month}_${day}.xlsx`; - const mailOptions = { - from: process.env.MAIL_USERNAME, - to: EMAILS.join(","), - subject, - text: body, - }; + const transporter = nodemailer.createTransport({ + host: process.env.MAIL_HOST, + port: process.env.MAIL_PORT, + secure: true, + auth: { + user: process.env.MAIL_USERNAME, + pass: process.env.MAIL_PASSWORD, + }, + connectionTimeout: 10000, // prevent timeout + pool: true, // reuse connection if many mails + }); - if (attachmentPath) { - mailOptions.attachments = [{ filename: path.basename(attachmentPath), path: attachmentPath }]; + const mailOptions = { + from: process.env.MAIL_USERNAME, + to: EMAILS.join(","), + subject, + text: body, + attachments: isError + ? [] + : [ + { + filename: fileName, + content: response.data, // attach from memory + }, + ], + }; + + await transporter.sendMail(mailOptions); + console.log("✅ Email sent successfully with Excel attachment!"); + }); + } catch (err) { + console.error("❌ Failed to send email:", err.message); } - - await transporter.sendMail(mailOptions); } async function main() { - // 1️⃣ Connect to MySQL const db = await mysql.createConnection({ host: process.env.MYSQL_HOST, // e.g. '127.0.0.1' or remote IP user: process.env.MYSQL_USER, @@ -178,8 +235,7 @@ async function main() { for (const store of LIST_STORE) { console.log(`Processing ${store.name}`); let items = await scrapeWithCheerio(store.url); - if (!items) items = await scrapeWithPuppeteer(store.url); - + if (!items) items = await scrapeWithPuppeteer(store.url, store.name); let count = 0; for (const item of items) { // 2️⃣ Check if record exists @@ -188,18 +244,19 @@ async function main() { // 3️⃣ Insert new record const priceText = item.price || ""; + const title = (item.name || "").replace("Opens in a new window or tab", "").trim(); const amount = parseFloat(priceText.replace(/[^\d.]/g, "").replace(/,/g, "")); await db.execute( `INSERT INTO items_sold_out (id, name, \`condition\`, price, currency, link_detail, shop_name, sold_out_date, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, [ - item.id, - item.name, - item.condition, + item.id || "", + title, + item.condition || "", amount || 0, - item.currency, - item.link_detail, - store.name, - item.sold_out_date, + item.currency || "", + item.link_detail || "", + store.name || "", + item.sold_out_date || "", dayjs().format("YYYY-MM-DD HH:mm:ss"), dayjs().format("YYYY-MM-DD HH:mm:ss"), ] @@ -211,20 +268,22 @@ async function main() { inserted.push({ name: store.name, count }); } - // 4️⃣ Send email report if (errors.length > 3) { const msg = errors.map((e) => `URL: ${e.url}\nMessage: ${e.message}`).join("\n\n"); - await sendMail("[New Items] - Scraping Sold Out Error Report", msg); + await sendMail("[New Items] - Scraping Sold Out Error Report", msg, true); } else { - const msg = inserted.map((i) => `Shop: ${i.name}\nInserted: ${i.count}`).join("\n\n"); + const msg = inserted.map((i) => `Shop: ${i.name}\nSold: ${i.count} items`).join("\n\n"); await sendMail("[New Items] - Scraping Sold Out Success", msg); } console.log("✅ Done scraping."); - // 5️⃣ Close MySQL connection await db.end(); console.log("🔌 MySQL connection closed"); + process.exit(0); } -main().catch((err) => console.error(err)); +main().catch((err) => { + console.error(err); + process.exit(1); +});