Example - wikipedia images downloader

HTML output retrieves first 10 available images from en.wikipedia.org. Output is refreshing each 15 minutes from cron.

 

Outputs:

Source URL;Image URL;Name
"http://en.wikipedia.org/wiki/Main_Page";"https://upload.wikimedia.org/wikipedia/commons/thumb/d/df/Catherine_Zeta-Jones_VF_2012_Shankbone_2.jpg/100px-Catherine_Zeta-Jones_VF_2012_Shankbone_2.jpg";"100px-Catherine_Zeta-Jones_VF_2012_Shankbone_2.jpg"
"http://en.wikipedia.org/wiki/Main_Page";"https://upload.wikimedia.org/wikipedia/en/thumb/e/ec/Tom_Doerr%27s_Lambda.jpg/120px-Tom_Doerr%27s_Lambda.jpg";"120px-Tom_Doerr%27s_Lambda.jpg"
"http://en.wikipedia.org/wiki/Main_Page";"https://upload.wikimedia.org/wikipedia/commons/thumb/5/59/Ciudad-de-M%C3%A9xico---Terremoto-Puebla-2017-3-cropped.jpg/107px-Ciudad-de-M%C3%A9xico---Terremoto-Puebla-2017-3-cropped.jpg";"107px-Ciudad-de-M%C3%A9xico---Terremoto-Puebla-2017-3-cropped.jpg"
"http://en.wikipedia.org/wiki/Main_Page";"https://upload.wikimedia.org/wikipedia/commons/thumb/6/6c/Peking_Opera_1.JPG/120px-Peking_Opera_1.JPG";"120px-Peking_Opera_1.JPG"
"http://en.wikipedia.org/wiki/Main_Page";"https://upload.wikimedia.org/wikipedia/commons/thumb/c/cd/Walt_Disney_Snow_white_1937_trailer_screenshot_%2813%29.jpg/125px-Walt_Disney_Snow_white_1937_trailer_screenshot_%2813%29.jpg";"125px-Walt_Disney_Snow_white_1937_trailer_screenshot_%2813%29.jpg"
"http://en.wikipedia.org/wiki/Main_Page";"https://upload.wikimedia.org/wikipedia/commons/thumb/6/6d/Carl_Van_Vechten_-_William_Faulkner.jpg/280px-Carl_Van_Vechten_-_William_Faulkner.jpg";"280px-Carl_Van_Vechten_-_William_Faulkner.jpg"
"http://en.wikipedia.org/wiki/Main_Page";"https://upload.wikimedia.org/wikipedia/en/thumb/4/4a/Commons-logo.svg/31px-Commons-logo.svg.png";"31px-Commons-logo.svg.png"
"http://en.wikipedia.org/wiki/Main_Page";"https://upload.wikimedia.org/wikipedia/commons/thumb/3/3d/Mediawiki-logo.png/35px-Mediawiki-logo.png";"35px-Mediawiki-logo.png"
"http://en.wikipedia.org/wiki/Main_Page";"https://upload.wikimedia.org/wikipedia/commons/thumb/7/75/Wikimedia_Community_Logo.svg/35px-Wikimedia_Community_Logo.svg.png";"35px-Wikimedia_Community_Logo.svg.png"
"http://en.wikipedia.org/wiki/Main_Page";"https://upload.wikimedia.org/wikipedia/commons/thumb/f/fa/Wikibooks-logo.svg/35px-Wikibooks-logo.svg.png";"35px-Wikibooks-logo.svg.png"

Source code of script:

# File: imgdownloader_main.w
# Name: Image Downloader
# Description: Script opens defined URL, finds first 10 available images, downloads them into folder 'images' and saves basic information into CSV file.
# Input: URL
# Output format: CSV file, XLS file, images
# Output fields: Source URL, Image URL, Name

#<Logger File>
#	Global
#	FileName imgdownloader_log.log
#	Level debug
#</Logger>

<Section>
	Name imgdownloader_main
	
	Define $output_file_csv imgdownloader_output.csv
	Define $output_file_xls imgdownloader_output.xls
	
	#define website url
	Define $url http://en.wikipedia.org/wiki/Main_Page
	
	
	
	#delete old output CSV file
	<Action Exec>
		cmd rm -f *.csv
	</Action>
		
	
	
	# downloading the content of URL
	<Action ContentURL>
		URL {$url} 
		RemoveNewLine
	</Action>
		
	<Section While>
		MaxIterations 10
		
		# two types of patterns
		<Section Or>
			NoContext
			
			<Pattern>
				RegExp <img{:re(.*?)}src="{$url_img:re([^"]*)}"
				Trim
				Compact
				MultiLine
			</Pattern>
			
			<Pattern>
				RegExp <img{:re(.*?)}src={$url_img:re([^ ]*)} 
				Trim
				Compact
				MultiLine
			</Pattern>
		</Section>
		
		# relative address -> absolute address
		<Action Php>
			Code if (!eregi("^http",$context->getVariable('$url_img'))) $context->setVariable('$url_img', 'https:'.$context->getVariable('$url_img'));
		</Action>
		
		# image name
		<Action Php>
			Code $context->setVariable('$name_img',pathinfo($context->getVariable('$url_img'), PATHINFO_FILENAME).".".pathinfo($context->getVariable('$url_img'), PATHINFO_EXTENSION));
		</Action>
		
		# downloading image
		<Action URLToFile>
			URL {$url_img}
			FileName images/{$name_img}
		</Action>
		
		# saving basic information into SCV file
		<Action SaveCSV>
			FileName {$output_file_csv}
			Separator ;
			Column $url, Source URL
			Column $url_img, Image URL
			Column $name_img, Name
		</Action> 
	</Section>
	
	
	
	#delete old output XLS file
	<Action Exec>
		cmd rm -f *.xls
	</Action>
	
	#convert data from csv file into xls file
	<Action ConvertToXLS>
		InputFile {$output_file_csv}
		OutputFile {$output_file_xls}
		Separator ;
	</Action>
</Section>

Main imgdownloader_main