go_service/repositories/scraper_lcw.go

297 lines
7.2 KiB
Go

package repositories
import (
"context"
"db_service/models"
helper "db_service/pkg"
"fmt"
"strconv"
"strings"
"github.com/PuerkitoBio/goquery"
"github.com/chromedp/cdproto/dom"
"github.com/chromedp/chromedp"
)
const (
LCW_URL = "https://www.lcwaikiki.com"
)
type LCWScraper struct {
link string
}
func NewLCWScraper(link string) LCWScraper {
return LCWScraper{link: link}
}
func (s LCWScraper) InitProductDetailParsing() (models.Product, error) {
product := models.Product{Vendor: "lcw"}
// load page with js
html, err := loadPage(s.link)
if err != nil {
return product, err
}
// convert to goquery document
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
if err != nil {
fmt.Println(err)
return product, err
}
product = parseDocument(doc, true)
return product, nil
}
// loadPage loads page using chromeDp
// ChromeDp waits until JS render complete. returns html string
func loadPage(url string) (string, error) {
var res string
ctx, cancel := chromedp.NewContext(context.Background())
defer cancel()
err := chromedp.Run(ctx,
chromedp.Navigate(url),
chromedp.ActionFunc(func(ctx context.Context) error {
node, err := dom.GetDocument().Do(ctx)
if err != nil {
return err
}
res, err = dom.GetOuterHTML().WithNodeID(node.NodeID).Do(ctx)
return err
}),
)
if err != nil {
return res, err
}
return res, nil
}
//parseDocument parses document using goQuery
func parseDocument(doc *goquery.Document, primaryModel bool) models.Product {
product := &models.Product{
ColorVariants: &([]models.Product{}),
SizeVariants: &([]models.Variant{}),
}
doc.Find("meta").Each(func(i int, s *goquery.Selection) {
name, _ := s.Attr("name")
content, _ := s.Attr("content")
switch name {
case "ModelId":
product.ProductGroupID = content
case "OptionId":
product.Sku = "p-" + content
product.ProductNumber = "lcw-"+content
case "ProductCode":
product.ProductCode = content
case "ProductName":
product.Name = content
case "BrandName":
product.Brand = content
case "Gender":
product.Cinsiyet = content
case "description":
product.Description = content
case "Color":
product.Color = content
case "DiscountPrice_1":
product.Price.SellingPrice.Text = ""
product.Price.SellingPrice.Value = convertStrToFloat(content)
case "CashPrice_1":
product.Price.OriginalPrice.Value = convertStrToFloat(content)
// case "Size":
// fmt.Println("SIZE: ", content)
// case "SizeId":
// fmt.Println("SIZE_ID: ", content)
}
})
productLink, ok := doc.Find(".share-link").Attr("value")
if ok {
urlKey := strings.ReplaceAll(productLink, LCW_URL, " ")
product.URLKey = strings.TrimSpace(urlKey)
}
setupPrices(product, doc)
setupSizeVariants(product, doc)
setupSizeImages(product, doc)
// call color variants for only primary model
if primaryModel {
setupColorVariants(product, doc)
}
setupProductProperties(product, doc)
return *product
}
func setupPrices(product *models.Product, doc *goquery.Document) {
basketPrice := strings.Trim(doc.Find(".col-xs-12 .price-area > .campaign-discount-detail > .basket-discount").Text(), " \n")
basketPriceFloat := convertStrToFloat(basketPrice)
// check basket price discount
if basketPriceFloat != 0.0 {
product.Price.DiscountedPrice.Value = basketPriceFloat
}
}
func setupSizeVariants(product *models.Product, doc *goquery.Document) {
optionSizes := doc.Find(".option-size").Children()
for i := range optionSizes.Nodes {
optionSizes.Eq(i).Each(func(ii int, selection *goquery.Selection) {
sizeValue, _ := selection.Attr("size")
dataStock, _ := selection.Attr("data-stock")
key, _ := selection.Attr("key")
keyInt, err := strconv.Atoi(product.ProductNumber + key)
if err != nil {
// ... handle error
// panic(err)
fmt.Println("error in generating keyInt for ItemNumber")
}
if len(dataStock) != 0 {
stock, _ := strconv.Atoi(dataStock)
if stock > 3 {
sizeVariant := models.Variant{
AttributeName: "Beden",
AttributeValue: sizeValue,
Stock: dataStock,
Sellable: true,
ItemNumber: keyInt,
Price: models.Price{
DiscountedPrice: models.PriceValue{
Text: "",
Value: product.Price.DiscountedPrice.Value,
},
SellingPrice: models.PriceValue{
Text: "",
Value: product.Price.SellingPrice.Value,
},
OriginalPrice: models.PriceValue{
Text: "",
Value: product.Price.OriginalPrice.Value,
},
},
}
isItemAdded := helper.IsLCWSizeVariantsAdded(*product.SizeVariants, sizeVariant)
if !isItemAdded {
*product.SizeVariants = append(*product.SizeVariants, sizeVariant)
}
}
}
})
}
}
//setupColorVariants gets color variant products.
func setupColorVariants(product *models.Product, doc *goquery.Document) {
colorVariants := make([]models.Product, 0)
doc.Find(".color-option").Each(func(i int, s *goquery.Selection) {
optionLink, ok := s.Attr("href")
if ok && !strings.Contains(optionLink, "javascript:") {
url := LCW_URL + optionLink
htmlPage, err := loadPage(url)
if err != nil {
return
}
newDoc, err := goquery.NewDocumentFromReader(strings.NewReader(htmlPage))
if err != nil {
fmt.Println("err: ", err)
return
}
colorOption := parseDocument(newDoc, false)
colorVariants = append(colorVariants, colorOption)
}
})
product.ColorVariantCount = len(colorVariants)
*product.ColorVariants = append(*product.ColorVariants, colorVariants...)
}
func setupSizeImages(product *models.Product, doc *goquery.Document) {
// images := make([]string, 0)
doc.Find("img").Each(func(i int, s *goquery.Selection) {
if image, ok := s.Attr("smallimages"); ok {
product.Images = append(product.Images, image)
}
})
}
func setupProductProperties(product *models.Product, doc *goquery.Document) {
// get collapseOne
collapseOne := doc.Find("#collapseOne")
// get descriptions
collapseOne.Find("li").Each(func(i int, selection *goquery.Selection) {
desc := strings.TrimSpace(selection.Text())
if len(desc) != 0 {
desc := models.DescriptionModel{
Description: desc,
Bold: false,
}
// descriptions = append(descriptions, desc)
product.Descriptions = append(product.Descriptions, desc)
}
})
collapseOne.Find(".option-info").Each(func(i int, selection *goquery.Selection) {
selection.Find("p").Each(func(i int, s *goquery.Selection) {
text := strings.Trim(s.Text(), "")
if len(text) != 0 {
attr := strings.Split(text, ":")
attrKey, attrValue := strings.Trim(attr[0], " "), strings.Trim(attr[1], " ")
if len(attrKey) != 0 && len(attrValue) != 0 {
attribute := map[string]string{
strings.ToLower(attrKey): attrValue,
}
product.Attributes = append(product.Attributes, attribute)
}
}
})
})
}
func convertStrToFloat(price string) float64 {
// remove TL
priceStr := strings.ReplaceAll(price, " TL", "")
// remove .(dot)
priceStr = strings.ReplaceAll(priceStr, ".", "")
// replace ,(comma) with .(dot)
priceStr = strings.ReplaceAll(priceStr, ",", ".")
// remove whitespace
priceStr = strings.ReplaceAll(priceStr, " ", "")
priceFloat, err := strconv.ParseFloat(priceStr, 64)
if err != nil {
return 0
}
return priceFloat
}