From d1a12fb08d95dab250eca01c04dc2a1d1163c4f5 Mon Sep 17 00:00:00 2001
From: Ronald
Roadside Stations: 6 to 10 (Health Risk: Moderate to Very High) - aqRoadRegex = /Roadside Stations: (\d*)( to (\d*))?/; // General Stations: 4 to 7 (Health Risk: Moderate to High)
Roadside Stations: 6 to 10 (Health Risk: Moderate to Very High) + aqDateRegex = /HKSAR Air Quality Health Index at : (.* \+0800)/,// e.g. "HKSAR Air Quality Health Index at : Sun, 15 Feb 2015 16:30:00 +0800 Current Condition" + aqGeneralRegex = /General Stations: (\d*)( to (\d*))?/, // "General Stations: 4 to 7 (Health Risk: Moderate to High)
Roadside Stations: 6 to 10 (Health Risk: Moderate to Very High)" + aqRoadRegex = /Roadside Stations: (\d*)( to (\d*))?/, // "General Stations: 4 to 7 (Health Risk: Moderate to High)
Roadside Stations: 6 to 10 (Health Risk: Moderate to Very High)" + conditionRegex = /[^\/]+(\d\d)\..*$/; // "http://www.weather.gov.hk/images/wxicon/pic77.png" return bluebird.all([ // Current Weather from HKO _getXmlFeed(currentWeatherFeedUrl).then(function ($) { var weatherStr = $('description p').text(), + iconSrc = $('description img').first().attr("src"), degreesMatch = degreesRegex.exec(weatherStr), humidityMatch = humidityRegex.exec(weatherStr), uvIndexMatch = uvIndexRegex.exec(weatherStr), - uvIntensityMatch = uvIntensityRegex.exec(weatherStr); + uvIntensityMatch = uvIntensityRegex.exec(weatherStr), + conditionMatch = conditionRegex.exec(iconSrc); //console.log(degreesMatch); weather.degrees_c = (degreesMatch && degreesMatch.length > 1)? parseInt(degreesMatch[1]) : null; weather.humidity_pct = (humidityMatch && humidityMatch.length > 1)? parseInt(humidityMatch[1]) : null; weather.uv_index = (uvIndexMatch && uvIndexMatch.length > 1)? parseFloat(uvIndexMatch[1]) : null; weather.uv_intensity = (uvIntensityMatch && uvIntensityMatch.length > 1)? uvIntensityMatch[1].trim() : null; + if ((conditionMatch && conditionMatch.length > 1)){ + weather.weather_condition.number = parseInt(conditionMatch[1]); + weather.weather_condition.caption = (weatherConditionsMap[conditionMatch[1]])? weatherConditionsMap[conditionMatch[1]].caption : null; + weather.weather_condition.icon_url = iconSrc || null; + } }).catch(function(err){ console.error("Error parsing Current Weather data!",err, err.stack.toString()); }), - // Get Weather condition from openweathermap.org (icon mapping and condition name/description -- since there's no reliable way to scrape this data from HKO's feed) - // TODO: Parse HKO's homepage to get relevant weather condition instead of using openweathermap.org - _getJsonFeed(openweatherJsonFeedUrl).then(function (openWeatherData) { - var condition; - if (! (openWeatherData && openWeatherData.weather && openWeatherData.weather instanceof Array && openWeatherData.weather.length > 0)) throw new Error("Failed to get weather data from openweathermap.org"); - condition = openWeatherData.weather.pop(); - weather.weather_condition.id = condition.id; - weather.weather_condition.name = condition.main; - weather.weather_condition.description = condition.description; - weather.weather_condition.icon = condition.icon; - }).catch(function(err){ - console.error("Error parsing openweathermap.org data!",err, err.stack.toString()); - }), // Current Warning from HKO _getXmlFeed(currentWarningFeedUrl).then(function ($) { - var warningMatch = warningRegex.exec($('item title').text()); + var warningMatch = warningRegex.exec($('item title').text()), + iconSrc = $('description img').first().attr("src"); weather.weather_warning.text = (warningMatch && warningMatch.length > 1)? warningMatch[1].trim() : null; weather.weather_warning.date = (warningMatch && warningMatch.length >= 5)? new Date( @@ -123,6 +136,7 @@ parseInt(warningMatch[2]), // hour parseInt(warningMatch[3])) // minute : null; + weather.weather_warning.icon_url = iconSrc; }).catch(function(err){ console.error("Error parsing Weather Warning data!",err, err.stack.toString()); }), diff --git a/package.json b/package.json index 20d9e2d..63fe89a 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "hko-scraper", - "version": "0.0.2", + "version": "0.0.3", "description": "Hong Kong Observatory and Air Quality data scraper", "main": "index.js", "scripts": {