SoldOut/index.js

290 lines
8.9 KiB
JavaScript
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import "dotenv/config";
import puppeteer from "puppeteer";
import axios from "axios";
import * as cheerio from "cheerio";
import nodemailer from "nodemailer";
import dayjs from "dayjs";
import mysql from "mysql2/promise";
// const EMAILS = ["andrew.ng@apactech.io", "dev@apactech.io"];
const EMAILS = ["andrew.ng@apactech.io"];
const LIST_STORE = [
{
name: "ocdepot",
url: "https://www.ebay.com/sch/i.html?_dkr=1&iconV2Request=true&_blrs=recall_filtering&_ssn=ocdepot&store_name=ocdepot&_oac=1&LH_Sold=1&rt=nc&_ipg=240",
},
{
name: "itinstock",
url: "https://www.ebay.com/sch/i.html?_dkr=1&iconV2Request=true&_blrs=recall_filtering&_ssn=itinstock&store_name=itinstock&_oac=1&LH_Sold=1&rt=nc&_ipg=240",
},
{
name: "kartechllc",
url: "https://www.ebay.com/sch/i.html?_dkr=1&iconV2Request=true&_blrs=recall_filtering&_ssn=luckywolf29&store_name=kartechllc&_oac=1&LH_Sold=1&rt=nc&_ipg=240",
},
{
name: "g-electronic",
url: "https://www.ebay.com/sch/i.html?_dkr=1&iconV2Request=true&_blrs=recall_filtering&_ssn=g-electronic&store_name=gelectronic&_oac=1&LH_Sold=1&rt=nc&_ipg=240",
},
];
// Define function promise waiting for a given time
async function wait(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
async function scrapeWithPuppeteer(url, name) {
try {
const browser = await puppeteer.launch({
headless: true,
args: ["--no-sandbox", "--disable-setuid-sandbox"],
});
const page = await browser.newPage();
await page.goto(url, { waitUntil: "networkidle2" });
let retries = 0;
while (retries < 10) {
try {
console.log(`${name} retry ${retries + 1}`);
const html = await page.content();
// Detect Cloudflare or other blocking messages
if (html.includes("Checking your browser")) {
await wait(5000);
retries++;
continue;
}
const hasItems = await page.$("li.s-card--horizontal");
if (hasItems) break;
await wait(2000);
retries++;
} catch (err) {
await wait(2000);
retries++;
}
}
// Wait for cards or detect Cloudflare
const html = await page.content();
const needBrowserCheck = html.includes("Checking your browser");
if (needBrowserCheck) {
await page.waitForSelector("li.s-card--horizontal", { timeout: 15000 }).catch(() => null);
}
const items = await page.$$eval("li.s-card--horizontal", (nodes) => {
const results = [];
nodes.forEach((node) => {
const payload = {};
const soldEl = node.querySelector(".s-card__caption .su-styled-text.positive.default");
if (!soldEl) return;
const soldText = soldEl.textContent.trim().replace("Sold", "").trim();
payload.sold_out_date = soldText;
const date = new Date(soldText);
if (isNaN(date.getTime())) return;
const daysDiff = (Date.now() - date.getTime()) / (1000 * 60 * 60 * 24);
if (daysDiff > 7) return;
const linkEl = node.querySelector("div.su-media__image a");
if (linkEl) {
payload.link_detail = linkEl.href;
const match = linkEl.href.match(/\/itm\/(\d+)/);
if (match) payload.id = match[1];
}
const titleEl = node.querySelector(".s-card__title");
if (titleEl) {
payload.name = titleEl.textContent.replace(/New\s*listing/i, "").trim();
payload.description = payload.name;
}
const conditionEl = node.querySelector(".s-card__subtitle");
if (conditionEl) payload.condition = conditionEl.textContent.trim();
const priceEl = node.querySelector(".s-card__price");
if (priceEl) {
let txt = priceEl.textContent.replace(",", "").trim();
txt = txt.replace("£", "GBP").replace("$", "USD");
const match = txt.match(/([A-Za-z]{3})?\s?([\d.,]+)\s?([A-Za-z]{3})?/);
if (match) {
payload.currency = match[1] || match[3] || "";
payload.price = match[2] || "";
if (!payload.currency?.trim()) payload.currency = "USD";
}
}
if (payload.id) results.push(payload);
});
return results;
});
await browser.close();
return items;
} catch (error) {
console.error("Puppeteer scraping failed:", error);
return null;
}
}
async function scrapeWithCheerio(url) {
const { data } = await axios.get(url);
if (data.includes("Checking your browser")) return null;
const $ = cheerio.load(data);
const items = [];
$("li.s-card--horizontal").each((i, el) => {
const payload = {};
const soldText = $(el).find(".s-card__caption .su-styled-text.positive.default").text().trim();
if (!soldText) return;
payload.sold_out_date = soldText.replace("Sold", "").trim();
const date = new Date(soldText.replace("Sold", "").trim());
const daysDiff = (Date.now() - date.getTime()) / (1000 * 60 * 60 * 24);
if (daysDiff > 7) return;
const linkEl = $(el).find("div.su-media__image a").attr("href");
if (linkEl) {
payload.link_detail = linkEl;
const match = linkEl.match(/\/itm\/(\d+)/);
if (match) payload.id = match[1];
}
payload.name = $(el)
.find(".s-card__title")
.text()
.replace(/New\s*listing/i, "")
.trim();
payload.condition = $(el).find(".s-card__subtitle").text().trim();
const priceText = $(el).find(".s-card__price").text().trim().replace(",", "");
const match = priceText.match(/([£$€A-Za-z]{1,5})?\s?([\d.,]+)\s?([£$€A-Za-z]{1,5})?/);
if (match) {
const currency = match[1] || match[3] || "";
payload.price = match[2];
payload.currency = currency.replace("£", "GBP").replace("$", "USD");
if (!payload.currency?.trim()) payload.currency = "USD";
}
if (payload.id) items.push(payload);
});
return items;
}
async function sendMail(subject, body, isError = false) {
try {
await axios({
url: process.env.API_DISTI_HOST + "/api/export/items-sold-out",
method: "GET",
responseType: "arraybuffer",
timeout: 60000,
headers: {
"x-key": "CanTho#1",
},
}).then(async (response) => {
// console.log(response);
const now = new Date();
const year = now.getFullYear();
const month = String(now.getMonth() + 1).padStart(2, "0");
const day = String(now.getDate()).padStart(2, "0");
const fileName = `items_sold_out_${year}_${month}_${day}.xlsx`;
const transporter = nodemailer.createTransport({
host: process.env.MAIL_HOST,
port: process.env.MAIL_PORT,
secure: true,
auth: {
user: process.env.MAIL_USERNAME,
pass: process.env.MAIL_PASSWORD,
},
connectionTimeout: 10000, // prevent timeout
pool: true, // reuse connection if many mails
});
const mailOptions = {
from: process.env.MAIL_USERNAME,
to: EMAILS.join(","),
subject,
text: body,
attachments: isError
? []
: [
{
filename: fileName,
content: response.data, // attach from memory
},
],
};
await transporter.sendMail(mailOptions);
console.log("✅ Email sent successfully with Excel attachment!");
});
} catch (err) {
console.error("❌ Failed to send email:", err.message);
}
}
async function main() {
const db = await mysql.createConnection({
host: process.env.MYSQL_HOST, // e.g. '127.0.0.1' or remote IP
user: process.env.MYSQL_USER,
password: process.env.MYSQL_PASSWORD,
database: process.env.MYSQL_DB_NAME,
});
console.log("✅ Connected to MySQL");
const errors = [];
const inserted = [];
for (const store of LIST_STORE) {
console.log(`Processing ${store.name}`);
let items = await scrapeWithCheerio(store.url);
if (!items) items = await scrapeWithPuppeteer(store.url, store.name);
let count = 0;
for (const item of items) {
// 2⃣ Check if record exists
const [rows] = await db.execute("SELECT id FROM items_sold_out WHERE id = ?", [item.id]);
if (rows.length > 0) continue;
// 3⃣ Insert new record
const priceText = item.price || "";
const title = (item.name || "").replace("Opens in a new window or tab", "").trim();
const amount = parseFloat(priceText.replace(/[^\d.]/g, "").replace(/,/g, ""));
await db.execute(
`INSERT INTO items_sold_out (id, name, \`condition\`, price, currency, link_detail, shop_name, sold_out_date, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
[
item.id || "",
title,
item.condition || "",
amount || 0,
item.currency || "",
item.link_detail || "",
store.name || "",
item.sold_out_date || "",
dayjs().format("YYYY-MM-DD HH:mm:ss"),
dayjs().format("YYYY-MM-DD HH:mm:ss"),
]
);
count++;
}
if (count === 0) errors.push({ url: store.url, message: "No new items inserted" });
inserted.push({ name: store.name, count });
}
if (errors.length > 3) {
const msg = errors.map((e) => `URL: ${e.url}\nMessage: ${e.message}`).join("\n\n");
await sendMail("[New Items] - Scraping Sold Out Error Report", msg, true);
} else {
const msg = inserted.map((i) => `Shop: ${i.name}\nSold: ${i.count} items`).join("\n\n");
await sendMail("[New Items] - Scraping Sold Out Success", msg);
}
console.log("✅ Done scraping.");
await db.end();
console.log("🔌 MySQL connection closed");
process.exit(0);
}
main().catch((err) => {
console.error(err);
process.exit(1);
});