Do not pass unknown encoding names to nokogiri. (#30987)
This commit is contained in:
		
							parent
							
								
									36592d10aa
								
							
						
					
					
						commit
						2ea9336b68
					
				| @ -274,7 +274,7 @@ class LinkDetailsExtractor | ||||
|   end | ||||
| 
 | ||||
|   def detect_encoding_and_parse_document | ||||
|     [detect_encoding, nil, @html_charset].uniq.each do |encoding| | ||||
|     [detect_encoding, nil, header_encoding].uniq.each do |encoding| | ||||
|       document = Nokogiri::HTML(@html, nil, encoding) | ||||
|       return document if document.to_s.valid_encoding? | ||||
|     end | ||||
| @ -286,6 +286,13 @@ class LinkDetailsExtractor | ||||
|     guess&.fetch(:confidence, 0).to_i > 60 ? guess&.fetch(:encoding, nil) : nil | ||||
|   end | ||||
| 
 | ||||
|   def header_encoding | ||||
|     Encoding.find(@html_charset).name if @html_charset | ||||
|   rescue ArgumentError | ||||
|     # Encoding from HTTP header is not recognized by ruby | ||||
|     nil | ||||
|   end | ||||
| 
 | ||||
|   def detector | ||||
|     @detector ||= CharlockHolmes::EncodingDetector.new.tap do |detector| | ||||
|       detector.strip_tags = true | ||||
|  | ||||
							
								
								
									
										18
									
								
								spec/fixtures/requests/alternative_utf8_spelling_in_header.txt
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								spec/fixtures/requests/alternative_utf8_spelling_in_header.txt
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,18 @@ | ||||
| HTTP/1.1 200 OK | ||||
| server: nginx | ||||
| date: Thu, 13 Jun 2024 14:33:13 GMT | ||||
| content-type: text/html; charset=utf8 | ||||
| content-length: 192 | ||||
| accept-ranges: bytes | ||||
| 
 | ||||
| <!doctype html> | ||||
| <html lang="en"> | ||||
| <head> | ||||
|   <meta charset="utf-8"> | ||||
|   <title>Webserver Configs R Us</title> | ||||
| </head> | ||||
| <body> | ||||
|   <h2>Welcome</h2> | ||||
|   <p>Sneaky non-UTF character: á</p> | ||||
| </body> | ||||
| </html> | ||||
| @ -32,6 +32,7 @@ RSpec.describe FetchLinkCardService do | ||||
|     stub_request(:get, 'http://example.com/aergerliche-umlaute').to_return(request_fixture('redirect_with_utf8_url.txt')) | ||||
|     stub_request(:get, 'http://example.com/page_without_title').to_return(request_fixture('page_without_title.txt')) | ||||
|     stub_request(:get, 'http://example.com/long_canonical_url').to_return(request_fixture('long_canonical_url.txt')) | ||||
|     stub_request(:get, 'http://example.com/alternative_utf8_spelling_in_header').to_return(request_fixture('alternative_utf8_spelling_in_header.txt')) | ||||
| 
 | ||||
|     Rails.cache.write('oembed_endpoint:example.com', oembed_cache) if oembed_cache | ||||
| 
 | ||||
| @ -292,6 +293,14 @@ RSpec.describe FetchLinkCardService do | ||||
|         expect(status.preview_card).to be_nil | ||||
|       end | ||||
|     end | ||||
| 
 | ||||
|     context 'with a URL where the `Content-Type` header uses `utf8` instead of `utf-8`' do | ||||
|       let(:status) { Fabricate(:status, text: 'test http://example.com/alternative_utf8_spelling_in_header') } | ||||
| 
 | ||||
|       it 'does not create a preview card' do | ||||
|         expect(status.preview_card.title).to eq 'Webserver Configs R Us' | ||||
|       end | ||||
|     end | ||||
|   end | ||||
| 
 | ||||
|   context 'with a remote status' do | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user