Do not pass unknown encoding names to nokogiri. (#30987)
This commit is contained in:
		
							parent
							
								
									36592d10aa
								
							
						
					
					
						commit
						2ea9336b68
					
				| @ -274,7 +274,7 @@ class LinkDetailsExtractor | |||||||
|   end |   end | ||||||
| 
 | 
 | ||||||
|   def detect_encoding_and_parse_document |   def detect_encoding_and_parse_document | ||||||
|     [detect_encoding, nil, @html_charset].uniq.each do |encoding| |     [detect_encoding, nil, header_encoding].uniq.each do |encoding| | ||||||
|       document = Nokogiri::HTML(@html, nil, encoding) |       document = Nokogiri::HTML(@html, nil, encoding) | ||||||
|       return document if document.to_s.valid_encoding? |       return document if document.to_s.valid_encoding? | ||||||
|     end |     end | ||||||
| @ -286,6 +286,13 @@ class LinkDetailsExtractor | |||||||
|     guess&.fetch(:confidence, 0).to_i > 60 ? guess&.fetch(:encoding, nil) : nil |     guess&.fetch(:confidence, 0).to_i > 60 ? guess&.fetch(:encoding, nil) : nil | ||||||
|   end |   end | ||||||
| 
 | 
 | ||||||
|  |   def header_encoding | ||||||
|  |     Encoding.find(@html_charset).name if @html_charset | ||||||
|  |   rescue ArgumentError | ||||||
|  |     # Encoding from HTTP header is not recognized by ruby | ||||||
|  |     nil | ||||||
|  |   end | ||||||
|  | 
 | ||||||
|   def detector |   def detector | ||||||
|     @detector ||= CharlockHolmes::EncodingDetector.new.tap do |detector| |     @detector ||= CharlockHolmes::EncodingDetector.new.tap do |detector| | ||||||
|       detector.strip_tags = true |       detector.strip_tags = true | ||||||
|  | |||||||
							
								
								
									
										18
									
								
								spec/fixtures/requests/alternative_utf8_spelling_in_header.txt
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								spec/fixtures/requests/alternative_utf8_spelling_in_header.txt
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,18 @@ | |||||||
|  | HTTP/1.1 200 OK | ||||||
|  | server: nginx | ||||||
|  | date: Thu, 13 Jun 2024 14:33:13 GMT | ||||||
|  | content-type: text/html; charset=utf8 | ||||||
|  | content-length: 192 | ||||||
|  | accept-ranges: bytes | ||||||
|  | 
 | ||||||
|  | <!doctype html> | ||||||
|  | <html lang="en"> | ||||||
|  | <head> | ||||||
|  |   <meta charset="utf-8"> | ||||||
|  |   <title>Webserver Configs R Us</title> | ||||||
|  | </head> | ||||||
|  | <body> | ||||||
|  |   <h2>Welcome</h2> | ||||||
|  |   <p>Sneaky non-UTF character: á</p> | ||||||
|  | </body> | ||||||
|  | </html> | ||||||
| @ -32,6 +32,7 @@ RSpec.describe FetchLinkCardService do | |||||||
|     stub_request(:get, 'http://example.com/aergerliche-umlaute').to_return(request_fixture('redirect_with_utf8_url.txt')) |     stub_request(:get, 'http://example.com/aergerliche-umlaute').to_return(request_fixture('redirect_with_utf8_url.txt')) | ||||||
|     stub_request(:get, 'http://example.com/page_without_title').to_return(request_fixture('page_without_title.txt')) |     stub_request(:get, 'http://example.com/page_without_title').to_return(request_fixture('page_without_title.txt')) | ||||||
|     stub_request(:get, 'http://example.com/long_canonical_url').to_return(request_fixture('long_canonical_url.txt')) |     stub_request(:get, 'http://example.com/long_canonical_url').to_return(request_fixture('long_canonical_url.txt')) | ||||||
|  |     stub_request(:get, 'http://example.com/alternative_utf8_spelling_in_header').to_return(request_fixture('alternative_utf8_spelling_in_header.txt')) | ||||||
| 
 | 
 | ||||||
|     Rails.cache.write('oembed_endpoint:example.com', oembed_cache) if oembed_cache |     Rails.cache.write('oembed_endpoint:example.com', oembed_cache) if oembed_cache | ||||||
| 
 | 
 | ||||||
| @ -292,6 +293,14 @@ RSpec.describe FetchLinkCardService do | |||||||
|         expect(status.preview_card).to be_nil |         expect(status.preview_card).to be_nil | ||||||
|       end |       end | ||||||
|     end |     end | ||||||
|  | 
 | ||||||
|  |     context 'with a URL where the `Content-Type` header uses `utf8` instead of `utf-8`' do | ||||||
|  |       let(:status) { Fabricate(:status, text: 'test http://example.com/alternative_utf8_spelling_in_header') } | ||||||
|  | 
 | ||||||
|  |       it 'does not create a preview card' do | ||||||
|  |         expect(status.preview_card.title).to eq 'Webserver Configs R Us' | ||||||
|  |       end | ||||||
|  |     end | ||||||
|   end |   end | ||||||
| 
 | 
 | ||||||
|   context 'with a remote status' do |   context 'with a remote status' do | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user