Example - retrieves first 5 products from first 3 categories from hub.shop.ebay.com

Script retrieves first 5 products from first 3 categories from hub.shop.ebay.com and saves them to CSV file and XLS file. Output is refreshing each 15 minutes from cron (so there can be several seconds blank output).

 

 

Output:

Source URL;Category URL;Category Name;Expiration Date;Expiration Time;Offer Name;Current Bid;Bid Count;Image URL;Image Name;Image Alternative Text
"http://www.ebay.com/itm/Vint-1920s-30s-Chicago-Stained-Leaded-Glass-Window-SET-of-2-ready-to-hang-/332270438991?hash=item4d5cdc8a4f:g:P84AAOSw1WJZGSEf";"http://www.ebay.com/sch/Architectural-Garden/4707/i.html";"Architectural & Garden";"Jun 25, 2017";"20:16:40 PDT";"Vint 1920's 30's Chicago Stained Leaded Glass Window SET of 2 ready to hang";"";"";"https://i.ebayimg.com/images/g/P84AAOSw1WJZGSEf/s-l300.jpg";"13c446a94f.jpg";"Vint-1920-039-s-30-039-s-Chicago-Stained-Leaded-Glass-Window-SET-of-2-ready-to-hang"
"http://www.ebay.com/itm/Antique-Vintage-Brass-Pull-Ring-Handle-Knob-Project-Door-Drawer-/132120148229?hash=item1ec2f97905:g:VsYAAOSwol5YwJo3";"http://www.ebay.com/sch/Architectural-Garden/4707/i.html";"Architectural & Garden";"";"";"Antique/Vintag";"";"";"https://i.ebayimg.com/images/g/VsYAAOSwol5YwJo3/s-l300.jpg";"572fd38df7.jpg";"Antique-Vintage-Brass-Pull-Ring-Handle-Knob-Project-Door-Drawer"
"http://www.ebay.com/itm/Pair-Cast-Iron-Shelf-Brackets-Vintage-Victorian-Rustic-Antique-8-x-6-25-Pair-/302226425910?hash=item465e194836:g:a4sAAOSw4CFYpaVU";"http://www.ebay.com/sch/Architectural-Garden/4707/i.html";"Architectural & Garden";"";"";"Pair Cast Iron Shelf Brackets Vintage Victorian Rustic Antique 8 x 6.25 Pair";"";"";"https://i.ebayimg.com/images/g/a4sAAOSw4CFYpaVU/s-l300.jpg";"160cc47eac.jpg";"Pair-Cast-Iron-Shelf-Brackets-Vintage-Victorian-Rustic-Antique-8-x-6-25-Pair"
"http://www.ebay.com/itm/Antique-dresser-drawer-handles-pulls-pair-fleur-de-lis-/172731738430?hash=item28379d293e:g:LOMAAOSwRUhY~sG4";"http://www.ebay.com/sch/Architectural-Garden/4707/i.html";"Architectural & Garden";"Jun 21, 2017";"20:48:28 PDT";"Antique dresser drawer handles pulls pair fleur de lis";"";"";"https://i.ebayimg.com/images/g/LOMAAOSwRUhY~sG4/s-l300.jpg";"28da05094d.jpg";"Antique-dresser-drawer-handles-pulls-pair-fleur-de-lis"
"http://www.ebay.com/itm/Vintage-Wrought-Iron-Boot-Scraper-Garden-Mud-Shoe-Architectural-Antique-Old-/192171439602?hash=item2cbe4f75f2:g:L94AAOSw~y9ZAyxi";"http://www.ebay.com/sch/Architectural-Garden/4707/i.html";"Architectural & Garden";"";"";"Vintage Wrought Iron Boot Scraper Garden Mud Shoe Architectural Antique Old";"";"";"https://i.ebayimg.com/images/g/L94AAOSw~y9ZAyxi/s-l300.jpg";"643122b581.jpg";"Vintage-Wrought-Iron-Boot-Scraper-Garden-Mud-Shoe-Architectural-Antique-Old"

Source code of script:

# File: ebay_main.w
# Name: eBay - The World's Online Marketplace
# Description: Script retrieves first 5 products from first 3 categories from hub.shop.ebay.com
#              and saves them to CSV file and XLS file
# Input: URL [http://hub.shop.ebay.com/]
# Output format: CSV file, XLS file + HTML with overview of images
# Output fields: Source URL, Category URL, Category Name, Expiration Time, Offer Name,
#                Bid History, Current Bid, Image URL, Image Name, Image Alternative Text

#enable logging for debug purpose
#<Logger File>
#	Global
#	FileName ebay.log
#	Level debug
#</Logger>

#main section of script
<Section>
	#define name of section
	Name ebay_main
	
	Define $output_file_csv ebay_output.csv
	Define $output_file_xls ebay_output.xls
	Define $output_file_images ebay_images.html

	#define website url
	Define $main http://hub.shop.ebay.com/

    Define $path https://www.unitminer.com/fileadmin/scripts/ebay/img/
	
	
	
	#create directory for image storage if it does not exist
	<Action PHP>
		Code if (!file_exists("./img")) mkdir("./img", 0777);
	</Action>
		
	#delete old output CSV and HTML files and stored images
	<Action Exec>
		cmd rm -f *.csv
	</Action>
	
	<Action Exec>
		cmd rm -f img/*.jpg
	</Action>
	
	<Action Exec>
		cmd rm -f *.html
	</Action>

	
	
	#load content
	<Action ContentURL>
		#load content from the following URL saved in variable main
		URL {$main}
		#removes newlines from downloaded content for easier matching 
		RemoveNewLine
		#if a problem with a slow connection will occur, connection attempt will be automatically repeated three times with 10-seconds-long delay between individual attempts
		LowSpeedLimit 10240
		LowSpeedTime 5
		AutoRetryNoContent 10 3 
	</Action>
		
	#in this section we will iterate trough all main categories
	<Section While>
		#set maximum of iterations
		MaxIterations 3
	
		#this pattern should match category url and category name
		<Pattern>
			#defines expression which should match the data
			RegExp <a href="{$category_url:re([^"]*)}" class="ch">{$category_name}</a>
			#replace html encoded characters into their normal representation
			HtmlSpecialChars
			Trim
			Compact
		</Pattern>
		
		#evaluate script saved in file ebay_productlist.w
		<Action Eval>
			File ebay_productlist.w
		</Action>
	</Section>

	
	
	#delete old output XLS file 
	<Action Exec>
		cmd rm -f *.xls
	</Action>
	
	#convert data from csv file into xls file
	<Action ConvertToXLS>
		InputFile {$output_file_csv}
		OutputFile {$output_file_xls}
		Separator ;
	</Action>
</Section>

#run section with name "ebay_main"
Main ebay_main
# File: ebay_productlist.w

#main section of script
<Section>
	#define name of section
	Name ebay_productlist
	Optional
	  
	#load content
	<Action ContentURL>
		#load content from the following URL saved in variable category_url
		URL {$category_url}
		#removes newlines from downloaded content for easier matching 
		RemoveNewLine
		#if a problem with a slow connection will occur, connection attempt will be automatically repeated three times with 10-seconds-long delay between individual attempts
		LowSpeedLimit 10240
		LowSpeedTime 5
		AutoRetryNoContent 10 3 
	</Action>
	
	#in this section we will iterate trough all products in particular category
	<Section While>
		#set maximum of iterations
		MaxIterations 5
		
		<Pattern>
			RegExp <h3{:re([^<]*)}<a href="{$product_url:re([^"]*)}"{:re([\s]*)}class="vip"{:re([^>]*)}>{$product_name:re([^<]*)}<
			Trim
			Compact
		</Pattern>
		
		#evaluate script saved in file ebay_product.w
		<Action Eval>
			File ebay_product.w
		</Action>
	</Section>	
</Section>

#run section with name "ebay_productlist"
Main ebay_productlist
# File: ebay_product.w

#main section of script
<Section>
	#define name of section
	Name ebay_product
	Optional
	
	#load content
	<Action ContentURL>
		#load content from the following URL saved in variable product_url
		URL {$product_url}
		#removes newlines from downloaded content for easier matching 
		RemoveNewLine
		#if a problem with a slow connection will occur, connection attempt will be automatically repeated three times with 10-seconds-long delay between individual attempts
		LowSpeedLimit 10240
		LowSpeedTime 5
		AutoRetryNoContent 10 3 
	</Action>
	
	#this pattern should match image url and image alternative text
	<Pattern>
		#defines expression which should match the data
		RegExp <img id="icImg" class="img img300" itemprop="image" src="{$image_url:re([^"]*)}" style="" onload="picTimer=new Date().getTime();" clk="" alt="{$image_alternative:re([^"]*)}" />
		#replace html encoded characters into their normal representation
		HtmlSpecialChars
		Optional
		Trim
		Compact
	</Pattern>

	#define image name, which will be stored in csv file
	<Action Php>
		Code $context->setVariable('$image_name',\
			substr(md5($context->getVariable('$product_name')),0,10).".jpg");
	</Action>
	
	<Section>
		Optional

		#load image from the image url saved in variable image_url and save image to specified file
		<Action URLToFile>
			URL {$image_url}
			FileName img/{$image_name}
		</Action>

	        # add img to HTML
       	 <Action Print>
			FileName {$output_file_images}
			Text <img src="{$path}{$image_name}" alt="{$category_name} - {$product_name}" \
				title="{$category_name} - {$product_name}" width="60" />
	        </Action>
	</Section>

	#this pattern should match expiration time
	<Pattern>
		#defines expression which should match the data
		RegExp <span>({$expiration_date}</span>*<span class="endedDate">{$expiration_time:re([^\)]*)})</span>
		Optional
		TagsToStrip span
		Trim
		Compact
	</Pattern>
	
	#this pattern should match current bid
	<Pattern>
		#defines expression which should match the data
		RegExp <span class="notranslate" id="prcIsum_bidPrice" itemprop="price">{$current_bid}</span>
		Optional
		Trim
		Compact
	</Pattern>
	
	#this pattern should match count of bids
	<Pattern>
		#defines expression which should match the data
		RegExp <span id="qty-test">{$bid_count}</span>*<span>bids</span>
		Optional
		Trim
		Compact
	</Pattern>
	
	#print matched data to csv file
	<Action SaveCSV>
		FileName {$output_file_csv}
		Separator ;
		Column $product_url, Source URL
		Column $category_url, Category URL
		Column $category_name, Category Name
		Column $expiration_date, Expiration Date
		Column $expiration_time, Expiration Time
		Column $product_name, Offer Name
		Column $current_bid, Current Bid
		Column $bid_count, Bid Count
		Column $image_url, Image URL
		Column $image_name, Image Name
		Column $image_alternative, Image Alternative Text
	</Action>

</Section>

#run section with name "ebay_product"
Main ebay_product