SoldOut/index.js

231 lines
7.3 KiB
JavaScript
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import "dotenv/config";
import puppeteer from "puppeteer";
import axios from "axios";
import * as cheerio from "cheerio";
import nodemailer from "nodemailer";
import path from "path";
import dayjs from "dayjs";
import mysql from "mysql2/promise";
const EMAILS = ["andrew.ng@apactech.io", "dev@apactech.io"];
const LIST_STORE = [
{
name: "ocdepot",
url: "https://www.ebay.com/sch/i.html?_dkr=1&iconV2Request=true&_blrs=recall_filtering&_ssn=ocdepot&store_name=ocdepot&_oac=1&LH_Sold=1&rt=nc&_ipg=240",
},
{
name: "itinstock",
url: "https://www.ebay.com/sch/i.html?_dkr=1&iconV2Request=true&_blrs=recall_filtering&_ssn=itinstock&store_name=itinstock&_oac=1&LH_Sold=1&rt=nc&_ipg=240",
},
{
name: "kartechllc",
url: "https://www.ebay.com/sch/i.html?_dkr=1&iconV2Request=true&_blrs=recall_filtering&_ssn=luckywolf29&store_name=kartechllc&_oac=1&LH_Sold=1&rt=nc&_ipg=240",
},
{
name: "g-electronic",
url: "https://www.ebay.com/sch/i.html?_dkr=1&iconV2Request=true&_blrs=recall_filtering&_ssn=g-electronic&store_name=gelectronic&_oac=1&LH_Sold=1&rt=nc&_ipg=240",
},
];
async function scrapeWithPuppeteer(url) {
try {
const browser = await puppeteer.launch({
headless: true,
args: ["--no-sandbox", "--disable-setuid-sandbox"],
});
const page = await browser.newPage();
await page.goto(url, { waitUntil: "networkidle2" });
// Wait for cards or detect Cloudflare
const html = await page.content();
const needBrowserCheck = html.includes("Checking your browser");
if (needBrowserCheck) {
await page.waitForSelector("li.s-card--horizontal", { timeout: 15000 }).catch(() => null);
}
const items = await page.$$eval("li.s-card--horizontal", (nodes) => {
const results = [];
nodes.forEach((node) => {
const payload = {};
const soldEl = node.querySelector(".s-card__caption .su-styled-text.positive.default");
if (!soldEl) return;
const soldText = soldEl.textContent.trim().replace("Sold", "").trim();
payload.sold_out_date = soldText;
const date = new Date(soldText);
if (isNaN(date.getTime())) return;
const daysDiff = (Date.now() - date.getTime()) / (1000 * 60 * 60 * 24);
if (daysDiff > 7) return;
const linkEl = node.querySelector("div.su-media__image a");
if (linkEl) {
payload.link_detail = linkEl.href;
const match = linkEl.href.match(/\/itm\/(\d+)/);
if (match) payload.id = match[1];
}
const titleEl = node.querySelector(".s-card__title");
if (titleEl) {
payload.name = titleEl.textContent.replace(/New\s*listing/i, "").trim();
payload.description = payload.name;
}
const conditionEl = node.querySelector(".s-card__subtitle");
if (conditionEl) payload.condition = conditionEl.textContent.trim();
const priceEl = node.querySelector(".s-card__price");
if (priceEl) {
let txt = priceEl.textContent.replace(",", "").trim();
txt = txt.replace("£", "GBP ").replace("$", "USD ");
const match = txt.match(/([A-Za-z]{3})?\s?([\d.,]+)\s?([A-Za-z]{3})?/);
if (match) {
payload.currency = match[1] || "";
payload.price = match[2] || "";
}
}
if (payload.id) results.push(payload);
});
return results;
});
await browser.close();
return items;
} catch (error) {
console.error("Puppeteer scraping failed:", error);
return null;
}
}
async function scrapeWithCheerio(url) {
const { data } = await axios.get(url);
if (data.includes("Checking your browser")) return null;
const $ = cheerio.load(data);
const items = [];
$("li.s-card--horizontal").each((i, el) => {
const payload = {};
const soldText = $(el).find(".s-card__caption .su-styled-text.positive.default").text().trim();
if (!soldText) return;
payload.sold_out_date = soldText.replace("Sold", "").trim();
const date = new Date(soldText.replace("Sold", "").trim());
const daysDiff = (Date.now() - date.getTime()) / (1000 * 60 * 60 * 24);
if (daysDiff > 7) return;
const linkEl = $(el).find("div.su-media__image a").attr("href");
if (linkEl) {
payload.link_detail = linkEl;
const match = linkEl.match(/\/itm\/(\d+)/);
if (match) payload.id = match[1];
}
payload.name = $(el)
.find(".s-card__title")
.text()
.replace(/New\s*listing/i, "")
.trim();
payload.condition = $(el).find(".s-card__subtitle").text().trim();
const priceText = $(el).find(".s-card__price").text().trim().replace(",", "");
const match = priceText.match(/([£$€A-Za-z]{1,5})?\s?([\d.,]+)\s?([£$€A-Za-z]{1,5})?/);
if (match) {
const currency = match[1] || match[3] || "";
payload.price = match[2];
payload.currency = currency.replace("£", "GBP").replace("$", "USD");
if(!payload.currency?.trim()) payload.currency = "USD";
}
if (payload.id) items.push(payload);
});
return items;
}
async function sendMail(subject, body, attachmentPath = null) {
const transporter = nodemailer.createTransport({
host: process.env.MAIL_HOST,
port: process.env.MAIL_PORT,
secure: true,
auth: { user: process.env.MAIL_USERNAME, pass: process.env.MAIL_PASSWORD },
});
const mailOptions = {
from: process.env.MAIL_USERNAME,
to: EMAILS.join(","),
subject,
text: body,
};
if (attachmentPath) {
mailOptions.attachments = [{ filename: path.basename(attachmentPath), path: attachmentPath }];
}
await transporter.sendMail(mailOptions);
}
async function main() {
// 1⃣ Connect to MySQL
const db = await mysql.createConnection({
host: process.env.MYSQL_HOST, // e.g. '127.0.0.1' or remote IP
user: process.env.MYSQL_USER,
password: process.env.MYSQL_PASSWORD,
database: process.env.MYSQL_DB_NAME,
});
console.log("✅ Connected to MySQL");
const errors = [];
const inserted = [];
for (const store of LIST_STORE) {
console.log(`Processing ${store.name}`);
let items = await scrapeWithCheerio(store.url);
if (!items) items = await scrapeWithPuppeteer(store.url);
let count = 0;
for (const item of items) {
// 2⃣ Check if record exists
const [rows] = await db.execute("SELECT id FROM items_sold_out WHERE id = ?", [item.id]);
if (rows.length > 0) continue;
// 3⃣ Insert new record
const priceText = item.price || "";
const amount = parseFloat(priceText.replace(/[^\d.]/g, "").replace(/,/g, ""));
await db.execute(
`INSERT INTO items_sold_out (id, name, \`condition\`, price, currency, link_detail, shop_name, sold_out_date, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
[
item.id,
item.name,
item.condition,
amount || 0,
item.currency,
item.link_detail,
store.name,
item.sold_out_date,
dayjs().format("YYYY-MM-DD HH:mm:ss"),
dayjs().format("YYYY-MM-DD HH:mm:ss"),
]
);
count++;
}
if (count === 0) errors.push({ url: store.url, message: "No new items inserted" });
inserted.push({ name: store.name, count });
}
// 4⃣ Send email report
if (errors.length > 3) {
const msg = errors.map((e) => `URL: ${e.url}\nMessage: ${e.message}`).join("\n\n");
await sendMail("[New Items] - Scraping Sold Out Error Report", msg);
} else {
const msg = inserted.map((i) => `Shop: ${i.name}\nInserted: ${i.count}`).join("\n\n");
await sendMail("[New Items] - Scraping Sold Out Success", msg);
}
console.log("✅ Done scraping.");
// 5⃣ Close MySQL connection
await db.end();
console.log("🔌 MySQL connection closed");
}
main().catch((err) => console.error(err));