123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111 |
- -----------------------------------------------------------------------------
- -- Little program that checks links in HTML files, using coroutines and
- -- non-blocking I/O via the dispatcher module.
- -- LuaSocket sample files
- -- Author: Diego Nehab
- -----------------------------------------------------------------------------
- local url = require("socket.url")
- local dispatch = require("dispatch")
- local http = require("socket.http")
- dispatch.TIMEOUT = 10
- -- make sure the user knows how to invoke us
- arg = arg or {}
- if #arg < 1 then
- print("Usage:\n luasocket check-links.lua [-n] {<url>}")
- exit()
- end
- -- '-n' means we are running in non-blocking mode
- if arg[1] == "-n" then
- -- if non-blocking I/O was requested, use real dispatcher interface
- table.remove(arg, 1)
- handler = dispatch.newhandler("coroutine")
- else
- -- if using blocking I/O, use fake dispatcher interface
- handler = dispatch.newhandler("sequential")
- end
- local nthreads = 0
- -- get the status of a URL using the dispatcher
- function getstatus(link)
- local parsed = url.parse(link, {scheme = "file"})
- if parsed.scheme == "http" then
- nthreads = nthreads + 1
- handler:start(function()
- local r, c, h, s = http.request{
- method = "HEAD",
- url = link,
- create = handler.tcp
- }
- if r and c == 200 then io.write('\t', link, '\n')
- else io.write('\t', link, ': ', tostring(c), '\n') end
- nthreads = nthreads - 1
- end)
- end
- end
- function readfile(path)
- path = url.unescape(path)
- local file, error = io.open(path, "r")
- if file then
- local body = file:read("*a")
- file:close()
- return body
- else return nil, error end
- end
- function load(u)
- local parsed = url.parse(u, { scheme = "file" })
- local body, headers, code, error
- local base = u
- if parsed.scheme == "http" then
- body, code, headers = http.request(u)
- if code == 200 then
- -- if there was a redirect, update base to reflect it
- base = headers.location or base
- end
- if not body then
- error = code
- end
- elseif parsed.scheme == "file" then
- body, error = readfile(parsed.path)
- else error = string.format("unhandled scheme '%s'", parsed.scheme) end
- return base, body, error
- end
- function getlinks(body, base)
- -- get rid of comments
- body = string.gsub(body, "%<%!%-%-.-%-%-%>", "")
- local links = {}
- -- extract links
- body = string.gsub(body, '[Hh][Rr][Ee][Ff]%s*=%s*"([^"]*)"', function(href)
- table.insert(links, url.absolute(base, href))
- end)
- body = string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*'([^']*)'", function(href)
- table.insert(links, url.absolute(base, href))
- end)
- string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*(.-)>", function(href)
- table.insert(links, url.absolute(base, href))
- end)
- return links
- end
- function checklinks(address)
- local base, body, error = load(address)
- if not body then print(error) return end
- print("Checking ", base)
- local links = getlinks(body, base)
- for _, link in ipairs(links) do
- getstatus(link)
- end
- end
- for _, address in ipairs(arg) do
- checklinks(url.absolute("file:", address))
- end
- while nthreads > 0 do
- handler:step()
- end
|