Update index.js

This commit is contained in:
nguyentrungthat 2025-10-22 11:01:11 +07:00
parent 5237ceaba1
commit 703fbcffe7
1 changed files with 100 additions and 41 deletions

141
index.js
View File

@ -3,11 +3,11 @@ import puppeteer from "puppeteer";
import axios from "axios";
import * as cheerio from "cheerio";
import nodemailer from "nodemailer";
import path from "path";
import dayjs from "dayjs";
import mysql from "mysql2/promise";
const EMAILS = ["andrew.ng@apactech.io", "dev@apactech.io"];
// const EMAILS = ["andrew.ng@apactech.io", "dev@apactech.io"];
const EMAILS = ["andrew.ng@apactech.io"];
const LIST_STORE = [
{
@ -28,7 +28,12 @@ const LIST_STORE = [
},
];
async function scrapeWithPuppeteer(url) {
// Define function promise waiting for a given time
async function wait(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
async function scrapeWithPuppeteer(url, name) {
try {
const browser = await puppeteer.launch({
headless: true,
@ -37,13 +42,36 @@ async function scrapeWithPuppeteer(url) {
const page = await browser.newPage();
await page.goto(url, { waitUntil: "networkidle2" });
let retries = 0;
while (retries < 10) {
try {
console.log(`${name} retry ${retries + 1}`);
const html = await page.content();
// Detect Cloudflare or other blocking messages
if (html.includes("Checking your browser")) {
await wait(5000);
retries++;
continue;
}
const hasItems = await page.$("li.s-card--horizontal");
if (hasItems) break;
await wait(2000);
retries++;
} catch (err) {
await wait(2000);
retries++;
}
}
// Wait for cards or detect Cloudflare
const html = await page.content();
const needBrowserCheck = html.includes("Checking your browser");
if (needBrowserCheck) {
await page.waitForSelector("li.s-card--horizontal", { timeout: 15000 }).catch(() => null);
}
const items = await page.$$eval("li.s-card--horizontal", (nodes) => {
const results = [];
nodes.forEach((node) => {
@ -77,14 +105,14 @@ async function scrapeWithPuppeteer(url) {
const priceEl = node.querySelector(".s-card__price");
if (priceEl) {
let txt = priceEl.textContent.replace(",", "").trim();
txt = txt.replace("£", "GBP ").replace("$", "USD ");
txt = txt.replace("£", "GBP").replace("$", "USD");
const match = txt.match(/([A-Za-z]{3})?\s?([\d.,]+)\s?([A-Za-z]{3})?/);
if (match) {
payload.currency = match[1] || "";
payload.currency = match[1] || match[3] || "";
payload.price = match[2] || "";
if (!payload.currency?.trim()) payload.currency = "USD";
}
}
if (payload.id) results.push(payload);
});
return results;
@ -131,38 +159,67 @@ async function scrapeWithCheerio(url) {
const currency = match[1] || match[3] || "";
payload.price = match[2];
payload.currency = currency.replace("£", "GBP").replace("$", "USD");
if(!payload.currency?.trim()) payload.currency = "USD";
if (!payload.currency?.trim()) payload.currency = "USD";
}
if (payload.id) items.push(payload);
});
return items;
}
async function sendMail(subject, body, attachmentPath = null) {
const transporter = nodemailer.createTransport({
host: process.env.MAIL_HOST,
port: process.env.MAIL_PORT,
secure: true,
auth: { user: process.env.MAIL_USERNAME, pass: process.env.MAIL_PASSWORD },
});
async function sendMail(subject, body, isError = false) {
try {
await axios({
url: process.env.API_DISTI_HOST + "/api/export/items-sold-out",
method: "GET",
responseType: "arraybuffer",
timeout: 60000,
headers: {
"x-key": "CanTho#1",
},
}).then(async (response) => {
// console.log(response);
const now = new Date();
const year = now.getFullYear();
const month = String(now.getMonth() + 1).padStart(2, "0");
const day = String(now.getDate()).padStart(2, "0");
const fileName = `items_sold_out_${year}_${month}_${day}.xlsx`;
const mailOptions = {
from: process.env.MAIL_USERNAME,
to: EMAILS.join(","),
subject,
text: body,
};
const transporter = nodemailer.createTransport({
host: process.env.MAIL_HOST,
port: process.env.MAIL_PORT,
secure: true,
auth: {
user: process.env.MAIL_USERNAME,
pass: process.env.MAIL_PASSWORD,
},
connectionTimeout: 10000, // prevent timeout
pool: true, // reuse connection if many mails
});
if (attachmentPath) {
mailOptions.attachments = [{ filename: path.basename(attachmentPath), path: attachmentPath }];
const mailOptions = {
from: process.env.MAIL_USERNAME,
to: EMAILS.join(","),
subject,
text: body,
attachments: isError
? []
: [
{
filename: fileName,
content: response.data, // attach from memory
},
],
};
await transporter.sendMail(mailOptions);
console.log("✅ Email sent successfully with Excel attachment!");
});
} catch (err) {
console.error("❌ Failed to send email:", err.message);
}
await transporter.sendMail(mailOptions);
}
async function main() {
// 1⃣ Connect to MySQL
const db = await mysql.createConnection({
host: process.env.MYSQL_HOST, // e.g. '127.0.0.1' or remote IP
user: process.env.MYSQL_USER,
@ -178,8 +235,7 @@ async function main() {
for (const store of LIST_STORE) {
console.log(`Processing ${store.name}`);
let items = await scrapeWithCheerio(store.url);
if (!items) items = await scrapeWithPuppeteer(store.url);
if (!items) items = await scrapeWithPuppeteer(store.url, store.name);
let count = 0;
for (const item of items) {
// 2⃣ Check if record exists
@ -188,18 +244,19 @@ async function main() {
// 3⃣ Insert new record
const priceText = item.price || "";
const title = (item.name || "").replace("Opens in a new window or tab", "").trim();
const amount = parseFloat(priceText.replace(/[^\d.]/g, "").replace(/,/g, ""));
await db.execute(
`INSERT INTO items_sold_out (id, name, \`condition\`, price, currency, link_detail, shop_name, sold_out_date, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
[
item.id,
item.name,
item.condition,
item.id || "",
title,
item.condition || "",
amount || 0,
item.currency,
item.link_detail,
store.name,
item.sold_out_date,
item.currency || "",
item.link_detail || "",
store.name || "",
item.sold_out_date || "",
dayjs().format("YYYY-MM-DD HH:mm:ss"),
dayjs().format("YYYY-MM-DD HH:mm:ss"),
]
@ -211,20 +268,22 @@ async function main() {
inserted.push({ name: store.name, count });
}
// 4⃣ Send email report
if (errors.length > 3) {
const msg = errors.map((e) => `URL: ${e.url}\nMessage: ${e.message}`).join("\n\n");
await sendMail("[New Items] - Scraping Sold Out Error Report", msg);
await sendMail("[New Items] - Scraping Sold Out Error Report", msg, true);
} else {
const msg = inserted.map((i) => `Shop: ${i.name}\nInserted: ${i.count}`).join("\n\n");
const msg = inserted.map((i) => `Shop: ${i.name}\nSold: ${i.count} items`).join("\n\n");
await sendMail("[New Items] - Scraping Sold Out Success", msg);
}
console.log("✅ Done scraping.");
// 5⃣ Close MySQL connection
await db.end();
console.log("🔌 MySQL connection closed");
process.exit(0);
}
main().catch((err) => console.error(err));
main().catch((err) => {
console.error(err);
process.exit(1);
});