import "dotenv/config"; import puppeteer from "puppeteer"; import axios from "axios"; import * as cheerio from "cheerio"; import nodemailer from "nodemailer"; import dayjs from "dayjs"; import mysql from "mysql2/promise"; // const EMAILS = ["andrew.ng@apactech.io", "dev@apactech.io"]; const EMAILS = ["andrew.ng@apactech.io"]; const LIST_STORE = [ { name: "ocdepot", url: "https://www.ebay.com/sch/i.html?_dkr=1&iconV2Request=true&_blrs=recall_filtering&_ssn=ocdepot&store_name=ocdepot&_oac=1&LH_Sold=1&rt=nc&_ipg=240", }, { name: "itinstock", url: "https://www.ebay.com/sch/i.html?_dkr=1&iconV2Request=true&_blrs=recall_filtering&_ssn=itinstock&store_name=itinstock&_oac=1&LH_Sold=1&rt=nc&_ipg=240", }, { name: "kartechllc", url: "https://www.ebay.com/sch/i.html?_dkr=1&iconV2Request=true&_blrs=recall_filtering&_ssn=luckywolf29&store_name=kartechllc&_oac=1&LH_Sold=1&rt=nc&_ipg=240", }, { name: "g-electronic", url: "https://www.ebay.com/sch/i.html?_dkr=1&iconV2Request=true&_blrs=recall_filtering&_ssn=g-electronic&store_name=gelectronic&_oac=1&LH_Sold=1&rt=nc&_ipg=240", }, ]; // Define function promise waiting for a given time async function wait(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); } async function scrapeWithPuppeteer(url, name) { try { const browser = await puppeteer.launch({ headless: true, args: ["--no-sandbox", "--disable-setuid-sandbox"], }); const page = await browser.newPage(); await page.goto(url, { waitUntil: "networkidle2" }); let retries = 0; while (retries < 10) { try { console.log(`${name} retry ${retries + 1}`); const html = await page.content(); // Detect Cloudflare or other blocking messages if (html.includes("Checking your browser")) { await wait(5000); retries++; continue; } const hasItems = await page.$("li.s-card--horizontal"); if (hasItems) break; await wait(2000); retries++; } catch (err) { await wait(2000); retries++; } } // Wait for cards or detect Cloudflare const html = await page.content(); const needBrowserCheck = html.includes("Checking your browser"); if (needBrowserCheck) { await page.waitForSelector("li.s-card--horizontal", { timeout: 15000 }).catch(() => null); } const items = await page.$$eval("li.s-card--horizontal", (nodes) => { const results = []; nodes.forEach((node) => { const payload = {}; const soldEl = node.querySelector(".s-card__caption .su-styled-text.positive.default"); if (!soldEl) return; const soldText = soldEl.textContent.trim().replace("Sold", "").trim(); payload.sold_out_date = soldText; const date = new Date(soldText); if (isNaN(date.getTime())) return; const daysDiff = (Date.now() - date.getTime()) / (1000 * 60 * 60 * 24); if (daysDiff > 7) return; const linkEl = node.querySelector("div.su-media__image a"); if (linkEl) { payload.link_detail = linkEl.href; const match = linkEl.href.match(/\/itm\/(\d+)/); if (match) payload.id = match[1]; } const titleEl = node.querySelector(".s-card__title"); if (titleEl) { payload.name = titleEl.textContent.replace(/New\s*listing/i, "").trim(); payload.description = payload.name; } const conditionEl = node.querySelector(".s-card__subtitle"); if (conditionEl) payload.condition = conditionEl.textContent.trim(); const priceEl = node.querySelector(".s-card__price"); if (priceEl) { let txt = priceEl.textContent.replace(",", "").trim(); txt = txt.replace("£", "GBP").replace("$", "USD"); const match = txt.match(/([A-Za-z]{3})?\s?([\d.,]+)\s?([A-Za-z]{3})?/); if (match) { payload.currency = match[1] || match[3] || ""; payload.price = match[2] || ""; if (!payload.currency?.trim()) payload.currency = "USD"; } } if (payload.id) results.push(payload); }); return results; }); await browser.close(); return items; } catch (error) { console.error("Puppeteer scraping failed:", error); return null; } } async function scrapeWithCheerio(url) { const { data } = await axios.get(url); if (data.includes("Checking your browser")) return null; const $ = cheerio.load(data); const items = []; $("li.s-card--horizontal").each((i, el) => { const payload = {}; const soldText = $(el).find(".s-card__caption .su-styled-text.positive.default").text().trim(); if (!soldText) return; payload.sold_out_date = soldText.replace("Sold", "").trim(); const date = new Date(soldText.replace("Sold", "").trim()); const daysDiff = (Date.now() - date.getTime()) / (1000 * 60 * 60 * 24); if (daysDiff > 7) return; const linkEl = $(el).find("div.su-media__image a").attr("href"); if (linkEl) { payload.link_detail = linkEl; const match = linkEl.match(/\/itm\/(\d+)/); if (match) payload.id = match[1]; } payload.name = $(el) .find(".s-card__title") .text() .replace(/New\s*listing/i, "") .trim(); payload.condition = $(el).find(".s-card__subtitle").text().trim(); const priceText = $(el).find(".s-card__price").text().trim().replace(",", ""); const match = priceText.match(/([£$€A-Za-z]{1,5})?\s?([\d.,]+)\s?([£$€A-Za-z]{1,5})?/); if (match) { const currency = match[1] || match[3] || ""; payload.price = match[2]; payload.currency = currency.replace("£", "GBP").replace("$", "USD"); if (!payload.currency?.trim()) payload.currency = "USD"; } if (payload.id) items.push(payload); }); return items; } async function sendMail(subject, body, isError = false) { try { await axios({ url: process.env.API_DISTI_HOST + "/api/export/items-sold-out", method: "GET", responseType: "arraybuffer", timeout: 60000, headers: { "x-key": "CanTho#1", }, }).then(async (response) => { // console.log(response); const now = new Date(); const year = now.getFullYear(); const month = String(now.getMonth() + 1).padStart(2, "0"); const day = String(now.getDate()).padStart(2, "0"); const fileName = `items_sold_out_${year}_${month}_${day}.xlsx`; const transporter = nodemailer.createTransport({ host: process.env.MAIL_HOST, port: process.env.MAIL_PORT, secure: true, auth: { user: process.env.MAIL_USERNAME, pass: process.env.MAIL_PASSWORD, }, connectionTimeout: 10000, // prevent timeout pool: true, // reuse connection if many mails }); const mailOptions = { from: process.env.MAIL_USERNAME, to: EMAILS.join(","), subject, text: body, attachments: isError ? [] : [ { filename: fileName, content: response.data, // attach from memory }, ], }; await transporter.sendMail(mailOptions); console.log("✅ Email sent successfully with Excel attachment!"); }); } catch (err) { console.error("❌ Failed to send email:", err.message); } } async function main() { const db = await mysql.createConnection({ host: process.env.MYSQL_HOST, // e.g. '127.0.0.1' or remote IP user: process.env.MYSQL_USER, password: process.env.MYSQL_PASSWORD, database: process.env.MYSQL_DB_NAME, }); console.log("✅ Connected to MySQL"); const errors = []; const inserted = []; for (const store of LIST_STORE) { console.log(`Processing ${store.name}`); let items = await scrapeWithCheerio(store.url); if (!items) items = await scrapeWithPuppeteer(store.url, store.name); let count = 0; for (const item of items) { // 2️⃣ Check if record exists const [rows] = await db.execute("SELECT id FROM items_sold_out WHERE id = ?", [item.id]); if (rows.length > 0) continue; // 3️⃣ Insert new record const priceText = item.price || ""; const title = (item.name || "").replace("Opens in a new window or tab", "").trim(); const amount = parseFloat(priceText.replace(/[^\d.]/g, "").replace(/,/g, "")); await db.execute( `INSERT INTO items_sold_out (id, name, \`condition\`, price, currency, link_detail, shop_name, sold_out_date, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, [ item.id || "", title, item.condition || "", amount || 0, item.currency || "", item.link_detail || "", store.name || "", item.sold_out_date || "", dayjs().format("YYYY-MM-DD HH:mm:ss"), dayjs().format("YYYY-MM-DD HH:mm:ss"), ] ); count++; } if (count === 0) errors.push({ url: store.url, message: "No new items inserted" }); inserted.push({ name: store.name, count }); } if (errors.length > 3) { const msg = errors.map((e) => `URL: ${e.url}\nMessage: ${e.message}`).join("\n\n"); await sendMail("[New Items] - Scraping Sold Out Error Report", msg, true); } else { const msg = inserted.map((i) => `Shop: ${i.name}\nSold: ${i.count} items`).join("\n\n"); await sendMail("[New Items] - Scraping Sold Out Success", msg); } console.log("✅ Done scraping."); await db.end(); console.log("🔌 MySQL connection closed"); process.exit(0); } main().catch((err) => { console.error(err); process.exit(1); });