Fetch and return metadata

This commit is contained in:
Basti 2018-06-29 14:39:12 -07:00
parent 936aa38ff8
commit 4c0d40bd34
7 changed files with 114 additions and 19 deletions

View File

@ -3,6 +3,7 @@ require "manifique/web_client"
module Manifique module Manifique
class Agent class Agent
def initialize(options={}) def initialize(options={})
@options = options @options = options
@ -15,7 +16,7 @@ module Manifique
def fetch_metadata def fetch_metadata
web_client = WebClient.new(url: @url) web_client = WebClient.new(url: @url)
web_client.fetch_web_manifest web_client.fetch_metadata
end end
private private
@ -27,5 +28,6 @@ module Manifique
rescue URI::InvalidURIError rescue URI::InvalidURIError
false false
end end
end end
end end

15
lib/manifique/metadata.rb Normal file
View File

@ -0,0 +1,15 @@
require 'ostruct'
module Manifique
class Metadata
attr_accessor :manifest
def initialize
end
def to_json
# TODO serialize into JSON
end
end
end

View File

@ -1,21 +1,37 @@
require 'ostruct'
require 'faraday' require 'faraday'
require 'faraday_middleware' require 'faraday_middleware'
require "nokogiri" require "nokogiri"
require 'nitlink/response' require 'manifique/metadata'
require 'pry' require 'pry'
module Manifique module Manifique
class WebClient class WebClient
def initialize(options={}) def initialize(options={})
@options = options @options = options
@url = options[:url] @url = options[:url]
@metadata = Metadata.new
end
def fetch_metadata
fetch_website
manifest = fetch_web_manifest
if @metadata.manifest = manifest
return @metadata
else
#TODO assemble from HTML elements
end
@metadata
end end
def fetch_website def fetch_website
res = do_get_request @url res = do_get_request @url
@links = parse_http_link_header(res)
@html = Nokogiri::HTML(res.body) @html = Nokogiri::HTML(res.body)
rescue
false
end end
def fetch_web_manifest def fetch_web_manifest
@ -23,7 +39,7 @@ module Manifique
unless manifest_url.match(/^https?\:\/\//) unless manifest_url.match(/^https?\:\/\//)
# Link is just the manifest path, not an absolute URL # Link is just the manifest path, not an absolute URL
manifest_url = @url + manifest_url manifest_url = [@url.gsub(/\/$/, ''), manifest_url.gsub(/^\//, '')].join('/')
end end
res = do_get_request manifest_url res = do_get_request manifest_url
@ -39,22 +55,18 @@ module Manifique
b.adapter :net_http b.adapter :net_http
end end
res = conn.get url res = conn.get url
if res.status > 400 if res.status < 400
raise "Could not fetch #{url} successfully (#{res.status})"
else
res res
else
raise "Could not fetch #{url} successfully (#{res.status})"
end end
end end
def parse_http_link_header(response)
link_parser = Nitlink::Parser.new
link_parser.parse(response)
end
def discover_web_manifest_url(html) def discover_web_manifest_url(html)
html.at_css("link[rel=manifest]").attributes["href"].value html.at_css("link[rel=manifest]").attributes["href"].value
rescue rescue
false false
end end
end end
end end

View File

@ -1,6 +1,7 @@
require "spec_helper" require "spec_helper"
RSpec.describe Manifique::Agent do RSpec.describe Manifique::Agent do
describe "options" do describe "options" do
describe "URL validation" do describe "URL validation" do
context "with invalid URL" do context "with invalid URL" do
@ -19,4 +20,9 @@ RSpec.describe Manifique::Agent do
end end
end end
end end
describe "#fetch_metadata" do
end
end end

View File

@ -0,0 +1,7 @@
require "spec_helper"
require "manifique/metadata"
RSpec.describe Manifique::Metadata do
# RSpec.describe "Manifique::MetaData" do
end

View File

@ -2,6 +2,7 @@ require "spec_helper"
require "manifique/web_client" require "manifique/web_client"
RSpec.describe Manifique::WebClient do RSpec.describe Manifique::WebClient do
describe "#do_get_request" do describe "#do_get_request" do
before do before do
stub_request(:get, "http://example.com/404"). stub_request(:get, "http://example.com/404").
@ -49,7 +50,29 @@ RSpec.describe Manifique::WebClient do
end end
end end
describe "#fetch_website" do
let(:web_client) { Manifique::WebClient.new(url: "https://kosmos.social/") }
before do
index_html = File.read(File.join(__dir__, "..", "fixtures", "mastodon.html"));
stub_request(:get, "https://kosmos.social/").
to_return(body: index_html, status: 200, headers: {
"Content-Type": "text/html; charset=utf-8"
})
web_client.fetch_website
end
it "instantiates an HTML parser object" do
html = web_client.instance_variable_get("@html")
expect(html).to be_kind_of(Nokogiri::HTML::Document)
end
end
describe "#fetch_web_manifest" do describe "#fetch_web_manifest" do
let(:web_client) { Manifique::WebClient.new(url: "https://kosmos.social/") }
context "link[rel=manifest] present" do context "link[rel=manifest] present" do
before do before do
index_html = File.read(File.join(__dir__, "..", "fixtures", "mastodon.html")); index_html = File.read(File.join(__dir__, "..", "fixtures", "mastodon.html"));
@ -62,12 +85,11 @@ RSpec.describe Manifique::WebClient do
to_return(body: manifest, status: 200, headers: { to_return(body: manifest, status: 200, headers: {
"Content-Type": "application/json; charset=utf-8" "Content-Type": "application/json; charset=utf-8"
}) })
web_client.fetch_website
end end
let(:web_client) { Manifique::WebClient.new(url: "https://kosmos.social") }
subject do subject do
web_client.fetch_website
web_client.fetch_web_manifest web_client.fetch_web_manifest
end end
@ -84,12 +106,11 @@ RSpec.describe Manifique::WebClient do
to_return(body: index_html, status: 200, headers: { to_return(body: index_html, status: 200, headers: {
"Content-Type": "text/html; charset=utf-8" "Content-Type": "text/html; charset=utf-8"
}) })
web_client.fetch_website
end end
let(:web_client) { Manifique::WebClient.new(url: "https://kosmos.social") }
subject do subject do
web_client.fetch_website
web_client.fetch_web_manifest web_client.fetch_web_manifest
end end
@ -98,4 +119,35 @@ RSpec.describe Manifique::WebClient do
end end
end end
end end
describe "#fetch_metadata" do
let(:web_client) { Manifique::WebClient.new(url: "https://kosmos.social/") }
context "web app manifest present" do
before do
index_html = File.read(File.join(__dir__, "..", "fixtures", "mastodon.html"));
stub_request(:get, "https://kosmos.social/").
to_return(body: index_html, status: 200, headers: {
"Content-Type": "text/html; charset=utf-8"
})
manifest = File.read(File.join(__dir__, "..", "fixtures", "mastodon-web-app-manifest.json"));
stub_request(:get, "https://kosmos.social/mastodon-web-app-manifest.json").
to_return(body: manifest, status: 200, headers: {
"Content-Type": "application/json; charset=utf-8"
})
end
subject { web_client.fetch_metadata }
it "returns a metadata object" do
expect(subject).to be_kind_of(Manifique::Metadata)
end
it "stores the web app manifest data" do
expect(subject.manifest).to be_kind_of(Hash)
expect(subject.manifest["name"]).to eq("kosmos.social")
end
end
end
end end

View File

@ -1,6 +1,7 @@
require "spec_helper" require "spec_helper"
RSpec.describe Manifique do RSpec.describe Manifique do
it "has a version number" do it "has a version number" do
expect(Manifique::VERSION).not_to be nil expect(Manifique::VERSION).not_to be nil
end end