import axios from 'axios'; import cheerio from 'cheerio'; class AmazonBookScraper { constructor() { this.baseUrl = 'https://www.amazon.it/dp/'; } async fetchPage(isbn) { const url = `${this.baseUrl}${isbn}`; try { const { data } = await axios.get(url, { headers: { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) ' + 'AppleWebKit/537.36 (KHTML, like Gecko) ' + 'Chrome/113.0.0.0 Safari/537.36', // altri header se necessario }, }); return data; } catch (err) { console.error(`Errore fetching ISBN ${isbn}:`, err.message); return null; } } extractData(html) { const $ = cheerio.load(html); // Titolo let title = $('#productTitle').text().trim() || null; // Sottotitolo (Amazon spesso lo mette in #productSubtitle o nel titolo, proveremo) let subtitle = $('#productSubtitle').text().trim() || null; // Numero pagine, formato, edizione // Questi dati spesso sono nella tabella dettagli prodotto con id #detailBullets_feature_div o #productDetailsTable // Proviamo a estrarre da #detailBullets_feature_div let pages = null; let format = null; let edition = null; $('#detailBullets_feature_div li').each((i, el) => { const label = $(el).find('span.a-text-bold').text().trim().toLowerCase(); const value = $(el).find('span').last().text().trim(); if (label.includes('pagine') || label.includes('pagine stampate')) { pages = value; } else if (label.includes('formato')) { format = value; } else if (label.includes('edizione')) { edition = value; } }); // fallback su #productDetailsTable (altro possibile layout) if (!pages || !format || !edition) { $('#productDetailsTable .content tr').each((i, el) => { const label = $(el).find('th').text().trim().toLowerCase(); const value = $(el).find('td').text().trim(); if (!pages && (label.includes('pagine') || label.includes('pagine stampate'))) { pages = value; } else if (!format && label.includes('formato')) { format = value; } else if (!edition && label.includes('edizione')) { edition = value; } }); } return { title, subtitle, pages, format, edition }; } async scrapeISBN(isbn) { const html = await this.fetchPage(isbn); if (!html) return null; const data = this.extractData(html); return data; } async scrapeMultiple(isbnList) { const results = []; for (const isbn of isbnList) { console.log(`Scraping ISBN: ${isbn}`); const data = await this.scrapeISBN(isbn); results.push({ isbn, ...data }); // Per evitare blocchi, metti una pausa (es. 2 secondi) await new Promise((r) => setTimeout(r, 2000)); } return results; } } export async function ScraperDataAmazon(idapp, options) { const scraper = new AmazonBookScraper(); const isbn = options.isbn; try { const data = await scraper.scrapeISBN(isbn); console.log(data); return data; } catch (e) { console.error(e); return res.status(400).send({ code: server_constants.RIS_CODE_ERR, msg: '' }); } } export async function ScraperMultipleDataAmazon(idapp, options) { const scraper = new AmazonBookScraper(); const isbnList = ['8850224248']; // metti i tuoi ISBN qui try { const books = await scraper.scrapeMultiple(isbnList); console.log(books); } catch (e) { console.error(e); return res.status(400).send({ code: server_constants.RIS_CODE_ERR, msg: '' }); } } export default AmazonBookScraper;