From 58d48095482aa9aac8fe48b6feada1f50e9869cf Mon Sep 17 00:00:00 2001 From: Admin Date: Tue, 27 May 2025 17:00:58 +0700 Subject: [PATCH] update scrapte all bid --- .../models/allbids-scrap-model.js | 142 +++++------------- 1 file changed, 41 insertions(+), 101 deletions(-) diff --git a/scrape-data-keyword/models/allbids-scrap-model.js b/scrape-data-keyword/models/allbids-scrap-model.js index 9c8a2f7..a7d11d4 100644 --- a/scrape-data-keyword/models/allbids-scrap-model.js +++ b/scrape-data-keyword/models/allbids-scrap-model.js @@ -1,3 +1,4 @@ +import axios from "axios"; import browser from "../system/browser.js"; import { extractModelId, extractNumber } from "../system/ultils.js"; import { ScrapModel } from "./scrap-model.js"; @@ -6,114 +7,53 @@ export class AllbidsScrapModel extends ScrapModel { action = async () => { const urlsData = this.extractUrls(); + console.log({ urlsData }); + for (let item of urlsData) { - await this.page.goto(item.url); + // await this.page.goto(item.url); - const data = await this.getItemsInHtml(item); + // const data = await this.getItemsInHtml(item); - const results = this.filterItemByKeyword(item.keyword, data); + // const results = this.filterItemByKeyword(item.keyword, data); + + // this.results[item.keyword] = results; + + // console.log({ results: this.results }); + + const { data } = await axios({ + url: item.url, + method: "POST", + data: { + top: 50, + skip: 0, + sort: {}, + searchText: item.keyword, + filter: { + Display: true, + }, + dropship: false, + }, + }); + + const auctions = data?.auctions || []; + + const mappedData = auctions.map((item) => { + return { + url: item.AucDetailsUrlLink, + image_url: item.aucThumbnailUrl, + name: item.aucTitle, + keyword: data.keyword, + model: extractModelId(item.AucDetailsUrlLink), + current_price: item.aucCurrentBid, + scrap_config_id: this.scrap_config_id, + }; + }); + + const results = this.filterItemByKeyword(item.keyword, mappedData); this.results[item.keyword] = results; console.log({ results: this.results }); } }; - - async getPrice(url) { - const newPage = await browser.newPage(); // cần truyền 'url' từ bên ngoài nếu chưa có - - try { - await newPage.goto(`${this.web_bid.origin_url}${url}`, { - waitUntil: "domcontentloaded", // hoặc "networkidle2" nếu cần đợi AJAX - }); - - await newPage.waitForSelector("#bidPrefix > span.font-weight-bold", { - timeout: 10000, - }); - - const priceText = await newPage - .$eval("#bidPrefix > span.font-weight-bold", (el) => - el.textContent.trim() - ) - .catch(() => null); - - return extractNumber(priceText) || 0; - } catch (error) { - console.error(`Error getting price for ${url}:`, error); - return 0; - } finally { - await newPage.close(); - } - } - - getPriceByEl = async (elementHandle, model) => { - try { - const priceText = await elementHandle - .$eval(`#ps-bg-buy-btn-${model} .pds-button-label`, (el) => - el.textContent.trim() - ) - .catch(() => null); - - return extractNumber(priceText) || 0; - } catch (error) { - return 0; - } - }; - - getItemsInHtml = async (data) => { - await this.page.waitForSelector('input[name="searchText"]', { - timeout: 10000, - }); - - await this.page.type('input[name="searchText"]', data.keyword); - - await Promise.all([ - this.page.click( - "form .btn.btn-lg.btn-primary.waves-effect.allbids-cta-bid" - ), - this.page.waitForNavigation({ waitUntil: "networkidle0" }), // hoặc 'networkidle2' - ]); - - const elements = await this.page.$$("tbody > tr.row.ng-scope"); - - const results = []; - - for (const el of elements) { - const url = await el - .$eval(".col-md-5.col-lg-7.title > a", (el) => el.getAttribute("href")) - .catch(() => null); - - const model = extractModelId(url); - - const image_url = await el - .$eval(`#list${model} > div > img`, (img) => img.getAttribute("src")) - .catch(() => null); - - const name = await el - .$eval(`#list${model} > div:nth-child(1) > h3`, (el) => - el.textContent.trim() - ) - .catch(() => null); - - const priceText = await el - .$eval( - `#list${model} > div:nth-child(1) > div:nth-child(1) > span`, - (el) => el.textContent.trim() - ) - .catch(() => null); - - results.push({ - url, - image_url, - name, - keyword: data.keyword, - model, - current_price: extractNumber(priceText) || 0, - scrap_config_id: this.scrap_config_id, - }); - } - - console.log({ results }); - return results; - }; }