I've long been fascinated with logically connecting together disparate sets of interconnected data on the Internet. The basic term for doing this is "mashing" or creating a "mash-up". To date most efforts in this regard have been to display various info on a map. HousingMaps.com is perhaps the best known of such sites. It scans Craigslist for rental ads and then overlays these ads onto a Google Map.
I got to wondering how one would do mashing with ASP.Net. My research ending up letting me create two useful methods:
// Get the entire contents of a page
private string GetPageContents(string pageUrl)
{
string pageMarkup = "";
StreamReader streamReader = null;
try
{
HttpWebRequest webRequest;
Uri targetUri = new Uri(pageUrl);
webRequest = (System.Net.HttpWebRequest)HttpWebRequest.Create(targetUri);
// Note : A check was being done here to see if the ContentLength was > 0.
// But some sites mask it with a -1 so we'll skip it.
streamReader = new StreamReader(webRequest.GetResponse().GetResponseStream());
pageMarkup = streamReader.ReadToEnd(); //Response.Write(streamReader.ReadToEnd());
}
catch (Exception ex)
{
Debug.WriteLine("File does not exist - " + ex.Message);
}
finally
{
if (streamReader != null)
streamReader.Close();
}
return pageMarkup;
}
// Find the JPG image links on a web page
private ArrayList GetImageLinks(string pageUrl)
{
ArrayList photos = new ArrayList();
string picPage = GetPageContents(pageUrl).ToLower();
int idx = 0;
do
{
idx = picPage.IndexOf("img src", idx);
if (idx != -1)
{
idx += 9;
int idx2 = picPage.IndexOf(".jpg", idx);
string url = picPage.Substring(idx, idx2 + 4 - idx);
photos.Add(url);
}
} while (idx != -1);
return photos;
}
The first method seems to be quite solid. The second works well for me in my testing but I can't guarantee it will work in all situations.
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment