Example - rss news reader

HTML output retrieves all rss news from http://rss.news.yahoo.com/rss/trech. Output is refreshing each 15 minutes from cron.

 

Output:

Source code of script:

# File: rss_main.w
# Name: RSS READER
# Description: output html retrieves all rss news from http://chinese.wsj.com/gb/strhrd.asp
# Input: URL [http://rss.news.yahoo.com/rss/tech]
# Output format: HTML file
# Output fields: linked title, description(with the html code)

#<Logger File>
#    Global
#    FileName rss_log.log
#    # log all messages up to debug messages
#    Level debug
#</Logger>

<Section>
    Name RSS_reader
    Define $output_file rss_output.html
	
	
	
    # clean output file
	<Action Print>
        FileName {$output_file}
        FileMode Write
    </Action>
    
	
	
    # load RSS feed from yahoo
    <Action ContentURL>
        URL https://www.yahoo.com/news/rss/tech
        RemoveNewLine
		Header accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
		Header accept-encoding: deflate
		Header user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36
		Header upgrade-insecure-requests: 1
		SetOwnCurlOpt CURLOPT_ENCODING ''
    </Action>

    # identify all rss items, match the content and print it to HTML output
    <Section While>
        <Pattern>
            #pattern for matching the beginning of item in channel
            RegExp <item>	
        </Pattern>
		
		<Section>
			EndAt </item>
			Optional
			
			<Pattern>
				RegExp <title>{$rss_item_title}</title>
				Compact
				Trim
			</Pattern>
			
			<Pattern>
				RegExp <description>{$rss_item_description:re(.*?)}</description>
				Compact
				Trim
			</Pattern>
			
			<Pattern>
				RegExp <link>{$rss_item_link}</link>
				Compact
				Trim
			</Pattern>
		
			
			<Action Php>
				Code $context->setVariable('$rss_item_description', html_entity_decode($context->getVariable('$rss_item_description'))); 
			</Action>

			#print matched data to file
			<Action Print>
				FileName {$output_dir}{$output_file}
				Text <a href="{$rss_item_link}">{$rss_item_title}</a><br><p>{$rss_item_description}</p><br><br>
			</Action>
		</Section>
    </Section>
</Section>    

#start execution with section RSS_reader
Main RSS_reader