Update index.js
This commit is contained in:
parent
5237ceaba1
commit
703fbcffe7
141
index.js
141
index.js
|
|
@ -3,11 +3,11 @@ import puppeteer from "puppeteer";
|
|||
import axios from "axios";
|
||||
import * as cheerio from "cheerio";
|
||||
import nodemailer from "nodemailer";
|
||||
import path from "path";
|
||||
import dayjs from "dayjs";
|
||||
import mysql from "mysql2/promise";
|
||||
|
||||
const EMAILS = ["andrew.ng@apactech.io", "dev@apactech.io"];
|
||||
// const EMAILS = ["andrew.ng@apactech.io", "dev@apactech.io"];
|
||||
const EMAILS = ["andrew.ng@apactech.io"];
|
||||
|
||||
const LIST_STORE = [
|
||||
{
|
||||
|
|
@ -28,7 +28,12 @@ const LIST_STORE = [
|
|||
},
|
||||
];
|
||||
|
||||
async function scrapeWithPuppeteer(url) {
|
||||
// Define function promise waiting for a given time
|
||||
async function wait(ms) {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function scrapeWithPuppeteer(url, name) {
|
||||
try {
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
|
|
@ -37,13 +42,36 @@ async function scrapeWithPuppeteer(url) {
|
|||
const page = await browser.newPage();
|
||||
await page.goto(url, { waitUntil: "networkidle2" });
|
||||
|
||||
let retries = 0;
|
||||
while (retries < 10) {
|
||||
try {
|
||||
console.log(`${name} retry ${retries + 1}`);
|
||||
const html = await page.content();
|
||||
|
||||
// Detect Cloudflare or other blocking messages
|
||||
if (html.includes("Checking your browser")) {
|
||||
await wait(5000);
|
||||
retries++;
|
||||
continue;
|
||||
}
|
||||
|
||||
const hasItems = await page.$("li.s-card--horizontal");
|
||||
if (hasItems) break;
|
||||
|
||||
await wait(2000);
|
||||
retries++;
|
||||
} catch (err) {
|
||||
await wait(2000);
|
||||
retries++;
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for cards or detect Cloudflare
|
||||
const html = await page.content();
|
||||
const needBrowserCheck = html.includes("Checking your browser");
|
||||
if (needBrowserCheck) {
|
||||
await page.waitForSelector("li.s-card--horizontal", { timeout: 15000 }).catch(() => null);
|
||||
}
|
||||
|
||||
const items = await page.$$eval("li.s-card--horizontal", (nodes) => {
|
||||
const results = [];
|
||||
nodes.forEach((node) => {
|
||||
|
|
@ -77,14 +105,14 @@ async function scrapeWithPuppeteer(url) {
|
|||
const priceEl = node.querySelector(".s-card__price");
|
||||
if (priceEl) {
|
||||
let txt = priceEl.textContent.replace(",", "").trim();
|
||||
txt = txt.replace("£", "GBP ").replace("$", "USD ");
|
||||
txt = txt.replace("£", "GBP").replace("$", "USD");
|
||||
const match = txt.match(/([A-Za-z]{3})?\s?([\d.,]+)\s?([A-Za-z]{3})?/);
|
||||
if (match) {
|
||||
payload.currency = match[1] || "";
|
||||
payload.currency = match[1] || match[3] || "";
|
||||
payload.price = match[2] || "";
|
||||
if (!payload.currency?.trim()) payload.currency = "USD";
|
||||
}
|
||||
}
|
||||
|
||||
if (payload.id) results.push(payload);
|
||||
});
|
||||
return results;
|
||||
|
|
@ -131,38 +159,67 @@ async function scrapeWithCheerio(url) {
|
|||
const currency = match[1] || match[3] || "";
|
||||
payload.price = match[2];
|
||||
payload.currency = currency.replace("£", "GBP").replace("$", "USD");
|
||||
if(!payload.currency?.trim()) payload.currency = "USD";
|
||||
if (!payload.currency?.trim()) payload.currency = "USD";
|
||||
}
|
||||
|
||||
if (payload.id) items.push(payload);
|
||||
});
|
||||
return items;
|
||||
}
|
||||
|
||||
async function sendMail(subject, body, attachmentPath = null) {
|
||||
const transporter = nodemailer.createTransport({
|
||||
host: process.env.MAIL_HOST,
|
||||
port: process.env.MAIL_PORT,
|
||||
secure: true,
|
||||
auth: { user: process.env.MAIL_USERNAME, pass: process.env.MAIL_PASSWORD },
|
||||
});
|
||||
async function sendMail(subject, body, isError = false) {
|
||||
try {
|
||||
await axios({
|
||||
url: process.env.API_DISTI_HOST + "/api/export/items-sold-out",
|
||||
method: "GET",
|
||||
responseType: "arraybuffer",
|
||||
timeout: 60000,
|
||||
headers: {
|
||||
"x-key": "CanTho#1",
|
||||
},
|
||||
}).then(async (response) => {
|
||||
// console.log(response);
|
||||
const now = new Date();
|
||||
const year = now.getFullYear();
|
||||
const month = String(now.getMonth() + 1).padStart(2, "0");
|
||||
const day = String(now.getDate()).padStart(2, "0");
|
||||
const fileName = `items_sold_out_${year}_${month}_${day}.xlsx`;
|
||||
|
||||
const mailOptions = {
|
||||
from: process.env.MAIL_USERNAME,
|
||||
to: EMAILS.join(","),
|
||||
subject,
|
||||
text: body,
|
||||
};
|
||||
const transporter = nodemailer.createTransport({
|
||||
host: process.env.MAIL_HOST,
|
||||
port: process.env.MAIL_PORT,
|
||||
secure: true,
|
||||
auth: {
|
||||
user: process.env.MAIL_USERNAME,
|
||||
pass: process.env.MAIL_PASSWORD,
|
||||
},
|
||||
connectionTimeout: 10000, // prevent timeout
|
||||
pool: true, // reuse connection if many mails
|
||||
});
|
||||
|
||||
if (attachmentPath) {
|
||||
mailOptions.attachments = [{ filename: path.basename(attachmentPath), path: attachmentPath }];
|
||||
const mailOptions = {
|
||||
from: process.env.MAIL_USERNAME,
|
||||
to: EMAILS.join(","),
|
||||
subject,
|
||||
text: body,
|
||||
attachments: isError
|
||||
? []
|
||||
: [
|
||||
{
|
||||
filename: fileName,
|
||||
content: response.data, // attach from memory
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
await transporter.sendMail(mailOptions);
|
||||
console.log("✅ Email sent successfully with Excel attachment!");
|
||||
});
|
||||
} catch (err) {
|
||||
console.error("❌ Failed to send email:", err.message);
|
||||
}
|
||||
|
||||
await transporter.sendMail(mailOptions);
|
||||
}
|
||||
|
||||
async function main() {
|
||||
// 1️⃣ Connect to MySQL
|
||||
const db = await mysql.createConnection({
|
||||
host: process.env.MYSQL_HOST, // e.g. '127.0.0.1' or remote IP
|
||||
user: process.env.MYSQL_USER,
|
||||
|
|
@ -178,8 +235,7 @@ async function main() {
|
|||
for (const store of LIST_STORE) {
|
||||
console.log(`Processing ${store.name}`);
|
||||
let items = await scrapeWithCheerio(store.url);
|
||||
if (!items) items = await scrapeWithPuppeteer(store.url);
|
||||
|
||||
if (!items) items = await scrapeWithPuppeteer(store.url, store.name);
|
||||
let count = 0;
|
||||
for (const item of items) {
|
||||
// 2️⃣ Check if record exists
|
||||
|
|
@ -188,18 +244,19 @@ async function main() {
|
|||
|
||||
// 3️⃣ Insert new record
|
||||
const priceText = item.price || "";
|
||||
const title = (item.name || "").replace("Opens in a new window or tab", "").trim();
|
||||
const amount = parseFloat(priceText.replace(/[^\d.]/g, "").replace(/,/g, ""));
|
||||
await db.execute(
|
||||
`INSERT INTO items_sold_out (id, name, \`condition\`, price, currency, link_detail, shop_name, sold_out_date, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||
[
|
||||
item.id,
|
||||
item.name,
|
||||
item.condition,
|
||||
item.id || "",
|
||||
title,
|
||||
item.condition || "",
|
||||
amount || 0,
|
||||
item.currency,
|
||||
item.link_detail,
|
||||
store.name,
|
||||
item.sold_out_date,
|
||||
item.currency || "",
|
||||
item.link_detail || "",
|
||||
store.name || "",
|
||||
item.sold_out_date || "",
|
||||
dayjs().format("YYYY-MM-DD HH:mm:ss"),
|
||||
dayjs().format("YYYY-MM-DD HH:mm:ss"),
|
||||
]
|
||||
|
|
@ -211,20 +268,22 @@ async function main() {
|
|||
inserted.push({ name: store.name, count });
|
||||
}
|
||||
|
||||
// 4️⃣ Send email report
|
||||
if (errors.length > 3) {
|
||||
const msg = errors.map((e) => `URL: ${e.url}\nMessage: ${e.message}`).join("\n\n");
|
||||
await sendMail("[New Items] - Scraping Sold Out Error Report", msg);
|
||||
await sendMail("[New Items] - Scraping Sold Out Error Report", msg, true);
|
||||
} else {
|
||||
const msg = inserted.map((i) => `Shop: ${i.name}\nInserted: ${i.count}`).join("\n\n");
|
||||
const msg = inserted.map((i) => `Shop: ${i.name}\nSold: ${i.count} items`).join("\n\n");
|
||||
await sendMail("[New Items] - Scraping Sold Out Success", msg);
|
||||
}
|
||||
|
||||
console.log("✅ Done scraping.");
|
||||
|
||||
// 5️⃣ Close MySQL connection
|
||||
await db.end();
|
||||
console.log("🔌 MySQL connection closed");
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
main().catch((err) => console.error(err));
|
||||
main().catch((err) => {
|
||||
console.error(err);
|
||||
process.exit(1);
|
||||
});
|
||||
|
|
|
|||
Loading…
Reference in New Issue