Create a dynamic script to scrape multiple websites. -- 2
$250-750 USD
Kiszállításkor fizetve
Create a dynamic script in NodeJS to scrape 15+ sites using configurable json.
Requirement:
-Should be able to scrape lazy loading sites
-Download images
-Go through all pagination pages and scrape the child pages
-If element is a product link "click" that link and scrape that child page
-Get all below elements and retrieve correctly
See below for understanding how to set it up.
//Example configurable object
[
site1: {
url: "[login to view URL]",
parent: {
productList: "div.product-list",
product-link: "a.product-url",
product-name: ".product-info .product-title",
product-image: ".product-images img[src attribute]",
product-price: {
list-price: ".product-price span.old-price",
sale-price: ".product-price [login to view URL]"
}
pagination: "button.load-more-btn",
product-page: {
name: "form#product h1",
price: {
list-price: ".price .old",
sale-price: ".price new"
},
colors: "#options-articles li",
sizes: "#options-variants li",
description: ".description p",
images: {
main: "#images .product-image-box img[src attribute]",
thumbs: "#images .thumbs .product-thumbs li"
}
}
...
}
},
site2: {
Similar to above Site1
...
}
...
]
==================================================================================
//Product Class
Class Product() {
Product(name, link, image, price, colors, sizes, description) {
[login to view URL] = name;
[login to view URL] = image;
[login to view URL] = price;
[login to view URL] = colors;
[login to view URL] = sizes;
[login to view URL] = description;
}
}
===================================================================================
//Main Scraper Class
Class Scraper() {
Scraper(site) {
[login to view URL] = site;
}
getName(attr) {
return name value;
}
getLink(attr) {
return link value;
}
getMainImage(attr) {
return main value;
}
getThumbImages(attr) {
return thumb array;
}
getPrice(attr) {
return Price Object(list, sale);
}
getColor(attr) {
return Colors array;
}
getSizes(attr) {
return Sizes array;
}
getName(attr) {
return desc value;
}
hasPagination() {
if([login to view URL] element exist)
//scrape all pagination pages to the last page
}
downloadImage() {
if([login to view URL] element exist)
//download image
src = getImage();
download(src);
}
getProductInfo(product) {
//Go to product page and scrape using "product-page" attribute from json sample.
return {
name: String,
price {
old: String
new: String
},
images : {
main: String src,
thumbs: [array]
},
sizes: [array],
colors: [array],
description: String
}
}
getListOfProducts() {
//Use [login to view URL] to get listOfProducts
ArrayOfProducts = [];
for( items in listOfProducts ){
//create new product
product = new Product( getProductInfo() );
//add it to list
[login to view URL]( product );
}
}
}
Projektazonosító: #15688261