Fetch and return metadata

This commit is contained in:
2018-06-29 14:39:12 -07:00
parent 936aa38ff8
commit 4c0d40bd34
7 changed files with 114 additions and 19 deletions

View File

@@ -3,6 +3,7 @@ require "manifique/web_client"
module Manifique
class Agent
def initialize(options={})
@options = options
@@ -15,7 +16,7 @@ module Manifique
def fetch_metadata
web_client = WebClient.new(url: @url)
web_client.fetch_web_manifest
web_client.fetch_metadata
end
private
@@ -27,5 +28,6 @@ module Manifique
rescue URI::InvalidURIError
false
end
end
end

15
lib/manifique/metadata.rb Normal file
View File

@@ -0,0 +1,15 @@
require 'ostruct'
module Manifique
class Metadata
attr_accessor :manifest
def initialize
end
def to_json
# TODO serialize into JSON
end
end
end

View File

@@ -1,21 +1,37 @@
require 'ostruct'
require 'faraday'
require 'faraday_middleware'
require "nokogiri"
require 'nitlink/response'
require 'manifique/metadata'
require 'pry'
module Manifique
class WebClient
def initialize(options={})
@options = options
@url = options[:url]
@metadata = Metadata.new
end
def fetch_metadata
fetch_website
manifest = fetch_web_manifest
if @metadata.manifest = manifest
return @metadata
else
#TODO assemble from HTML elements
end
@metadata
end
def fetch_website
res = do_get_request @url
@links = parse_http_link_header(res)
@html = Nokogiri::HTML(res.body)
rescue
false
end
def fetch_web_manifest
@@ -23,7 +39,7 @@ module Manifique
unless manifest_url.match(/^https?\:\/\//)
# Link is just the manifest path, not an absolute URL
manifest_url = @url + manifest_url
manifest_url = [@url.gsub(/\/$/, ''), manifest_url.gsub(/^\//, '')].join('/')
end
res = do_get_request manifest_url
@@ -39,22 +55,18 @@ module Manifique
b.adapter :net_http
end
res = conn.get url
if res.status > 400
raise "Could not fetch #{url} successfully (#{res.status})"
else
if res.status < 400
res
else
raise "Could not fetch #{url} successfully (#{res.status})"
end
end
def parse_http_link_header(response)
link_parser = Nitlink::Parser.new
link_parser.parse(response)
end
def discover_web_manifest_url(html)
html.at_css("link[rel=manifest]").attributes["href"].value
rescue
false
end
end
end