Getting the HTML code of a web page can be useful when converting a web page to PDF in a certain context or state, for example, when you are already authenticated in an ASP.NET application and you want to convert a web page which is accessible only if you are authenticated, or if you want to convert an ASP.NET web page after some values were filled in a form. In these situation a possible solution is to get the HTML code being sent to browser and convert it to PDF, optionally providing a base URL used to resolve images, CSS and script files.
In this section will be presented three practical methods of getting the HTML code of web page using the HttpWebRequest class, overriding the Render method of the ASP.NET pages and calling the Server.Execute method from ASP.NET.
The System.NetHttpWebRequest class can be used to retreive the HTML code of a web page. HTTP cookies and headers, authentication credentials, proxy and other options can be set before accessing the web page. Below there is a simple example of getting the HTML code of a web page and converting it to PDF.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using Microsoft.AspNetCore.Mvc;
using Microsoft.AspNetCore.Http;
using System.Net;
using System.IO;
using System.Text;
using HiQPdf;
namespace HiQPdf_Demo.Controllers
{
public class GetWebPageHtmlController : Controller
{
public IActionResult Index()
{
return View();
}
[HttpPost]
public ActionResult ConvertToPdf(IFormCollection collection)
{
// the URL of the web page from where to retrieve the HTML code
string url = collection["textBoxUrl"];
// create the HTTP request
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
// Set credentials to use for this request
request.Credentials = CredentialCache.DefaultCredentials;
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
long contentLength = response.ContentLength;
string contentType = response.ContentType;
// Get the stream associated with the response
Stream receiveStream = response.GetResponseStream();
// Pipes the stream to a higher level stream reader with the required encoding format
StreamReader readStream = new StreamReader(receiveStream, Encoding.UTF8);
// get the HTML code of the web page
string htmlCode = readStream.ReadToEnd();
// close the response and response stream
response.Close();
readStream.Close();
// convert the HTML code to PDF
// create the HTML to PDF converter
HtmlToPdf htmlToPdfConverter = new HtmlToPdf();
// the base URL used to resolve images, CSS and script files
string baseUrl = url;
// convert HTML code to a PDF memory buffer
byte[] pdfBuffer = htmlToPdfConverter.ConvertHtmlToMemory(htmlCode, baseUrl);
FileResult fileResult = new FileContentResult(pdfBuffer, "application/pdf");
fileResult.FileDownloadName = "HtmlToPdf.pdf";
return fileResult;
}
}
}