Better hashtag normalization when processing a post (#26614)
This commit is contained in:
		
							parent
							
								
									bd023a2637
								
							
						
					
					
						commit
						58acaa9ae6
					
				| @ -105,6 +105,21 @@ describe('computeHashtagBarForStatus', () => { | ||||
|     ); | ||||
|   }); | ||||
| 
 | ||||
|   it('handles server-side normalized tags with accentuated characters', () => { | ||||
|     const status = createStatus( | ||||
|       '<p>Text</p><p><a href="test">#éaa</a> <a href="test">#Éaa</a></p>', | ||||
|       ['eaa'], // The server may normalize the hashtags in the `tags` attribute
 | ||||
|     ); | ||||
| 
 | ||||
|     const { hashtagsInBar, statusContentProps } = | ||||
|       computeHashtagBarForStatus(status); | ||||
| 
 | ||||
|     expect(hashtagsInBar).toEqual(['Éaa']); | ||||
|     expect(statusContentProps.statusContent).toMatchInlineSnapshot( | ||||
|       `"<p>Text</p>"`, | ||||
|     ); | ||||
|   }); | ||||
| 
 | ||||
|   it('does not display in bar a hashtag in content with a case difference', () => { | ||||
|     const status = createStatus( | ||||
|       '<p>Text <a href="test">#Éaa</a></p><p><a href="test">#éaa</a></p>', | ||||
|  | ||||
| @ -23,8 +23,9 @@ export type StatusLike = Record<{ | ||||
| }>; | ||||
| 
 | ||||
| function normalizeHashtag(hashtag: string) { | ||||
|   if (hashtag && hashtag.startsWith('#')) return hashtag.slice(1); | ||||
|   else return hashtag; | ||||
|   return ( | ||||
|     hashtag && hashtag.startsWith('#') ? hashtag.slice(1) : hashtag | ||||
|   ).normalize('NFKC'); | ||||
| } | ||||
| 
 | ||||
| function isNodeLinkHashtag(element: Node): element is HTMLLinkElement { | ||||
| @ -70,9 +71,16 @@ function uniqueHashtagsWithCaseHandling(hashtags: string[]) { | ||||
| } | ||||
| 
 | ||||
| // Create the collator once, this is much more efficient
 | ||||
| const collator = new Intl.Collator(undefined, { sensitivity: 'accent' }); | ||||
| const collator = new Intl.Collator(undefined, { | ||||
|   sensitivity: 'base', // we use this to emulate the ASCII folding done on the server-side, hopefuly more efficiently
 | ||||
| }); | ||||
| 
 | ||||
| function localeAwareInclude(collection: string[], value: string) { | ||||
|   return collection.find((item) => collator.compare(item, value) === 0); | ||||
|   const normalizedValue = value.normalize('NFKC'); | ||||
| 
 | ||||
|   return !!collection.find( | ||||
|     (item) => collator.compare(item.normalize('NFKC'), normalizedValue) === 0, | ||||
|   ); | ||||
| } | ||||
| 
 | ||||
| // We use an intermediate function here to make it easier to test
 | ||||
| @ -121,11 +129,13 @@ export function computeHashtagBarForStatus(status: StatusLike): { | ||||
|   // try to see if the last line is only hashtags
 | ||||
|   let onlyHashtags = true; | ||||
| 
 | ||||
|   const normalizedTagNames = tagNames.map((tag) => tag.normalize('NFKC')); | ||||
| 
 | ||||
|   Array.from(lastChild.childNodes).forEach((node) => { | ||||
|     if (isNodeLinkHashtag(node) && node.textContent) { | ||||
|       const normalized = normalizeHashtag(node.textContent); | ||||
| 
 | ||||
|       if (!localeAwareInclude(tagNames, normalized)) { | ||||
|       if (!localeAwareInclude(normalizedTagNames, normalized)) { | ||||
|         // stop here, this is not a real hashtag, so consider it as text
 | ||||
|         onlyHashtags = false; | ||||
|         return; | ||||
| @ -140,12 +150,14 @@ export function computeHashtagBarForStatus(status: StatusLike): { | ||||
|     } | ||||
|   }); | ||||
| 
 | ||||
|   const hashtagsInBar = tagNames.filter( | ||||
|     (tag) => | ||||
|       // the tag does not appear at all in the status content, it is an out-of-band tag
 | ||||
|       !localeAwareInclude(contentHashtags, tag) && | ||||
|       !localeAwareInclude(lastLineHashtags, tag), | ||||
|   ); | ||||
|   const hashtagsInBar = tagNames.filter((tag) => { | ||||
|     const normalizedTag = tag.normalize('NFKC'); | ||||
|     // the tag does not appear at all in the status content, it is an out-of-band tag
 | ||||
|     return ( | ||||
|       !localeAwareInclude(contentHashtags, normalizedTag) && | ||||
|       !localeAwareInclude(lastLineHashtags, normalizedTag) | ||||
|     ); | ||||
|   }); | ||||
| 
 | ||||
|   const isOnlyOneLine = contentWithoutLastLine.content.childElementCount === 0; | ||||
|   const hasMedia = status.get('media_attachments').size > 0; | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user