【问题标题】:Why is not downloading files?为什么不下载文件?
【发布时间】:2016-10-10 00:47:34
【问题描述】:

大家好,我做了这个小程序来抓取一些 html 信息,但它没有下载一些文件......

代码如下:

using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Text;
using System.Threading.Tasks;

namespace ConsoleApplication2
{
    class Program
    {


        static void Main(string[] args)
        {
            using (System.Net.WebClient client = new WebClient())
            {

                List<string> source = new List<string>();

                HtmlWeb web = new HtmlWeb();
                string url = "http://www.jornaldenegocios.pt/mercados/bolsa/detalhe/wall_street_cada_vez_mais_perto_de_maximo_historico.html";
                HtmlDocument document = web.Load(url);

                var head = document.DocumentNode.SelectSingleNode("//head");
                var meta = head.SelectNodes("//meta").AsEnumerable();
                var link = document.DocumentNode.SelectSingleNode("//head").SelectNodes("//link").AsEnumerable();
                var urls = document.DocumentNode.Descendants("img")
                        .Select(e => e.GetAttributeValue("src", null))
                        .Where(s => !String.IsNullOrEmpty(s));

                var titulo = "";
                var descricao = "";
                var linkImg = "";
                var linkIcon = "";
                var linkImgAlt = "";
                int length = 0;

                Uri myUri = new Uri(url);
                string host = myUri.Host;


                var fbProperties = (head.SelectNodes("//meta[contains(@property, 'og:')]") ?? Enumerable.Empty<HtmlNode>())
                    .ToDictionary(n => n.Attributes["property"].Value, n => n.Attributes["content"].Value);


                linkIcon = (head.SelectSingleNode("//link[contains(@rel, 'apple-touch-icon')]")?.Attributes["href"]?.Value) ??
                    (head.SelectSingleNode("//link[contains(@rel, 'icon')]")?.Attributes["href"]?.Value) ??
                    host + "/favicon.ico";


                var title = head.SelectSingleNode("//title")?.InnerText;



                if (fbProperties.TryGetValue("og:title", out titulo) == false || titulo == null)
                {
                    titulo = (title ?? host);
                }

                if (fbProperties.TryGetValue("og:description", out descricao) == false || descricao == null)
                {
                    descricao = ("none");
                }

                if (fbProperties.TryGetValue("og:image", out linkImg) == false || linkImg == null)
                {
                    linkImg = (linkImgAlt ?? "none");
                }

                foreach (var node in urls)
                {
                    source.Add(node);
                }

                foreach (var links in source)
                {
                    length = client.DownloadData(links).Length;

                    if (length<client.DownloadData(links).Length)
                    {
                        linkImgAlt = links;
                    }

                }




                Console.WriteLine("");
                Console.WriteLine("Titulo:");
                Console.WriteLine(titulo);
                Console.WriteLine("");
                Console.WriteLine("Descriçao:");
                Console.WriteLine(descricao);
                Console.WriteLine("");
                Console.WriteLine("Link da Imagem:");
                Console.WriteLine(linkImg);
                Console.WriteLine("");
                Console.WriteLine("Link do Icon:");
                Console.WriteLine(linkIcon);
                Console.WriteLine("");
                Console.WriteLine("Link da Imagem:");
                Console.WriteLine(length);
                Console.WriteLine("");
                Console.WriteLine("Link da Imagem:");
                Console.WriteLine(linkImgAlt);

                Console.ReadLine();
            }
        }
    }
}

一切都很好,只是这小部分给我带来了一些问题:

foreach (var node in urls)
{
    source.Add(node);
}

foreach (var links in source)
{            
    length = client.DownloadData(links).Length;

    if (length<client.DownloadData(links).Length)
    {
        linkImgAlt = links;
    }
}

这是我运行程序时遇到的错误:

System.dll 中出现“System.Net.WebException”类型的未处理异常

附加信息:找不到文件“C:\i\closePestana.png”。

打印:http://i.imgur.com/C9JPjtk.png

我认为这不是在下载文件,这就是为什么给我这个错误消息。你能帮我解决这个问题吗?

谢谢。

【问题讨论】:

    标签: c# html dom webclient html-agility-pack


    【解决方案1】:

    我没有得到答案...但是如果有人遇到我遇到的问题,这就是解决方案。

    using HtmlAgilityPack;
    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Net;
    using System.Net.Http;
    using System.Text;
    using System.Threading.Tasks;
    
    namespace ConsoleApplication2
    {
        class Program
        {
    
    
            static void Main(string[] args)
            {
                using (var client = new HttpClient())
                {
    
                    List<string> source = new List<string>();
    
                    HtmlWeb web = new HtmlWeb();
                    string url = "http://www.jornaldenegocios.pt/mercados/bolsa/detalhe/wall_street_cada_vez_mais_perto_de_maximo_historico.html";
                    HtmlDocument document = web.Load(url);
                    Uri myUri = new Uri(url);
                    string host = myUri.Host;
    
                    var head = document.DocumentNode.SelectSingleNode("//head");
                    var meta = head.SelectNodes("//meta").AsEnumerable();
                    var link = document.DocumentNode.SelectSingleNode("//head").SelectNodes("//link").AsEnumerable();
                    var urls = document.DocumentNode.SelectNodes("//img")
                            .Select(e => e.GetAttributeValue("src", null))
                            .Where(s => !string.IsNullOrEmpty(s))
                            .Where(s => !s.StartsWith("//"))
                            .Select(s => s.StartsWith("http") ? s : myUri.Scheme + "://" + host + s);
    
                    var titulo = "";
                    var descricao = "";
                    var linkImg = "";
                    var linkIcon = "";
                    var linkImgAlt = "";
                    var length = 0L;
    
    
    
                    var fbProperties = (head.SelectNodes("//meta[contains(@property, 'og:')]") ?? Enumerable.Empty<HtmlNode>())
                        .ToDictionary(n => n.Attributes["property"].Value, n => n.Attributes["content"].Value);
    
    
                    linkIcon = (head.SelectSingleNode("//link[contains(@rel, 'apple-touch-icon')]")?.Attributes["href"]?.Value) ??
                        (head.SelectSingleNode("//link[contains(@rel, 'icon')]")?.Attributes["href"]?.Value) ??
                        host + "/favicon.ico";
    
    
                    var title = head.SelectSingleNode("//title")?.InnerText;
    
    
    
                    if (fbProperties.TryGetValue("og:title", out titulo) == false || titulo == null)
                    {
                        titulo = (title ?? host);
                    }
    
                    if (fbProperties.TryGetValue("og:description", out descricao) == false || descricao == null)
                    {
                        descricao = ("none");
                    }
    
                    if (fbProperties.TryGetValue("og:image", out linkImg) == false || linkImg == null)
                    {
                        linkImg = (linkImgAlt ?? "none");
                    }
    
                    foreach (var node in urls)
                    {
                        source.Add(node);
                    }
    
    
    
                    foreach (var links in source)
                    {
                        try
                        {
                            var response = client.SendAsync(new HttpRequestMessage
                            {
                                Method = HttpMethod.Head,
                                RequestUri = new Uri(links)
                            }).Result;
    
                            var fileLength = response.Content.Headers.ContentLength;
    
                            Console.WriteLine($"{links}: {fileLength} bytes");
    
                            if (length < fileLength)
                            {
                                linkImgAlt = links;
                                length = fileLength ?? 0;
                            }
    
                        }
                        catch (Exception e)
                        {
                            Console.WriteLine(e);
                        }
    
                    }
    
    
                    Console.WriteLine("");
                    Console.WriteLine("Titulo:");
                    Console.WriteLine(titulo);
                    Console.WriteLine("");
                    Console.WriteLine("Descriçao:");
                    Console.WriteLine(descricao);
                    Console.WriteLine("");
                    Console.WriteLine("Link da Imagem:");
                    Console.WriteLine(linkImg);
                    Console.WriteLine("");
                    Console.WriteLine("Link do Icon:");
                    Console.WriteLine(linkIcon);
                    Console.WriteLine("");
                    Console.WriteLine("Link da Imagem:");
                    Console.WriteLine(length);
                    Console.WriteLine("");
                    Console.WriteLine("Link da Imagem (alt):");
                    Console.WriteLine(linkImgAlt);
    
                    Console.ReadLine();
                }
            }
        }
    }
    

    【讨论】:

      猜你喜欢
      • 2016-05-11
      • 1970-01-01
      • 1970-01-01
      • 2015-11-15
      • 2018-08-03
      • 2012-09-20
      • 2013-04-03
      • 1970-01-01
      • 1970-01-01
      相关资源
      最近更新 更多