import "dotenv/config"; import puppeteer from "puppeteer"; import axios from "axios"; import * as cheerio from "cheerio"; import nodemailer from "nodemailer"; import path from "path"; import dayjs from "dayjs"; import mysql from "mysql2/promise"; const EMAILS = ["andrew.ng@apactech.io", "dev@apactech.io"]; const LIST_STORE = [ { name: "ocdepot", url: "https://www.ebay.com/sch/i.html?_dkr=1&iconV2Request=true&_blrs=recall_filtering&_ssn=ocdepot&store_name=ocdepot&_oac=1&LH_Sold=1&rt=nc&_ipg=240", }, { name: "itinstock", url: "https://www.ebay.com/sch/i.html?_dkr=1&iconV2Request=true&_blrs=recall_filtering&_ssn=itinstock&store_name=itinstock&_oac=1&LH_Sold=1&rt=nc&_ipg=240", }, { name: "kartechllc", url: "https://www.ebay.com/sch/i.html?_dkr=1&iconV2Request=true&_blrs=recall_filtering&_ssn=luckywolf29&store_name=kartechllc&_oac=1&LH_Sold=1&rt=nc&_ipg=240", }, { name: "g-electronic", url: "https://www.ebay.com/sch/i.html?_dkr=1&iconV2Request=true&_blrs=recall_filtering&_ssn=g-electronic&store_name=gelectronic&_oac=1&LH_Sold=1&rt=nc&_ipg=240", }, ]; async function scrapeWithPuppeteer(url) { try { const browser = await puppeteer.launch({ headless: true, args: ["--no-sandbox", "--disable-setuid-sandbox"], }); const page = await browser.newPage(); await page.goto(url, { waitUntil: "networkidle2" }); // Wait for cards or detect Cloudflare const html = await page.content(); const needBrowserCheck = html.includes("Checking your browser"); if (needBrowserCheck) { await page.waitForSelector("li.s-card--horizontal", { timeout: 15000 }).catch(() => null); } const items = await page.$$eval("li.s-card--horizontal", (nodes) => { const results = []; nodes.forEach((node) => { const payload = {}; const soldEl = node.querySelector(".s-card__caption .su-styled-text.positive.default"); if (!soldEl) return; const soldText = soldEl.textContent.trim().replace("Sold", "").trim(); payload.sold_out_date = soldText; const date = new Date(soldText); if (isNaN(date.getTime())) return; const daysDiff = (Date.now() - date.getTime()) / (1000 * 60 * 60 * 24); if (daysDiff > 7) return; const linkEl = node.querySelector("div.su-media__image a"); if (linkEl) { payload.link_detail = linkEl.href; const match = linkEl.href.match(/\/itm\/(\d+)/); if (match) payload.id = match[1]; } const titleEl = node.querySelector(".s-card__title"); if (titleEl) { payload.name = titleEl.textContent.replace(/New\s*listing/i, "").trim(); payload.description = payload.name; } const conditionEl = node.querySelector(".s-card__subtitle"); if (conditionEl) payload.condition = conditionEl.textContent.trim(); const priceEl = node.querySelector(".s-card__price"); if (priceEl) { let txt = priceEl.textContent.replace(",", "").trim(); txt = txt.replace("£", "GBP ").replace("$", "USD "); const match = txt.match(/([A-Za-z]{3})?\s?([\d.,]+)\s?([A-Za-z]{3})?/); if (match) { payload.currency = match[1] || ""; payload.price = match[2] || ""; } } if (payload.id) results.push(payload); }); return results; }); await browser.close(); return items; } catch (error) { console.error("Puppeteer scraping failed:", error); return null; } } async function scrapeWithCheerio(url) { const { data } = await axios.get(url); if (data.includes("Checking your browser")) return null; const $ = cheerio.load(data); const items = []; $("li.s-card--horizontal").each((i, el) => { const payload = {}; const soldText = $(el).find(".s-card__caption .su-styled-text.positive.default").text().trim(); if (!soldText) return; payload.sold_out_date = soldText.replace("Sold", "").trim(); const date = new Date(soldText.replace("Sold", "").trim()); const daysDiff = (Date.now() - date.getTime()) / (1000 * 60 * 60 * 24); if (daysDiff > 7) return; const linkEl = $(el).find("div.su-media__image a").attr("href"); if (linkEl) { payload.link_detail = linkEl; const match = linkEl.match(/\/itm\/(\d+)/); if (match) payload.id = match[1]; } payload.name = $(el) .find(".s-card__title") .text() .replace(/New\s*listing/i, "") .trim(); payload.condition = $(el).find(".s-card__subtitle").text().trim(); const priceText = $(el).find(".s-card__price").text().trim().replace(",", ""); const match = priceText.match(/([£$€A-Za-z]{1,5})?\s?([\d.,]+)\s?([£$€A-Za-z]{1,5})?/); if (match) { const currency = match[1] || match[3] || ""; payload.price = match[2]; payload.currency = currency.replace("£", "GBP").replace("$", "USD"); if(!payload.currency?.trim()) payload.currency = "USD"; } if (payload.id) items.push(payload); }); return items; } async function sendMail(subject, body, attachmentPath = null) { const transporter = nodemailer.createTransport({ host: process.env.MAIL_HOST, port: process.env.MAIL_PORT, secure: true, auth: { user: process.env.MAIL_USERNAME, pass: process.env.MAIL_PASSWORD }, }); const mailOptions = { from: process.env.MAIL_USERNAME, to: EMAILS.join(","), subject, text: body, }; if (attachmentPath) { mailOptions.attachments = [{ filename: path.basename(attachmentPath), path: attachmentPath }]; } await transporter.sendMail(mailOptions); } async function main() { // 1️⃣ Connect to MySQL const db = await mysql.createConnection({ host: process.env.MYSQL_HOST, // e.g. '127.0.0.1' or remote IP user: process.env.MYSQL_USER, password: process.env.MYSQL_PASSWORD, database: process.env.MYSQL_DB_NAME, }); console.log("✅ Connected to MySQL"); const errors = []; const inserted = []; for (const store of LIST_STORE) { console.log(`Processing ${store.name}`); let items = await scrapeWithCheerio(store.url); if (!items) items = await scrapeWithPuppeteer(store.url); let count = 0; for (const item of items) { // 2️⃣ Check if record exists const [rows] = await db.execute("SELECT id FROM items_sold_out WHERE id = ?", [item.id]); if (rows.length > 0) continue; // 3️⃣ Insert new record const priceText = item.price || ""; const amount = parseFloat(priceText.replace(/[^\d.]/g, "").replace(/,/g, "")); await db.execute( `INSERT INTO items_sold_out (id, name, \`condition\`, price, currency, link_detail, shop_name, sold_out_date, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, [ item.id, item.name, item.condition, amount || 0, item.currency, item.link_detail, store.name, item.sold_out_date, dayjs().format("YYYY-MM-DD HH:mm:ss"), dayjs().format("YYYY-MM-DD HH:mm:ss"), ] ); count++; } if (count === 0) errors.push({ url: store.url, message: "No new items inserted" }); inserted.push({ name: store.name, count }); } // 4️⃣ Send email report if (errors.length > 3) { const msg = errors.map((e) => `URL: ${e.url}\nMessage: ${e.message}`).join("\n\n"); await sendMail("[New Items] - Scraping Sold Out Error Report", msg); } else { const msg = inserted.map((i) => `Shop: ${i.name}\nInserted: ${i.count}`).join("\n\n"); await sendMail("[New Items] - Scraping Sold Out Success", msg); } console.log("✅ Done scraping."); // 5️⃣ Close MySQL connection await db.end(); console.log("🔌 MySQL connection closed"); } main().catch((err) => console.error(err));