123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414 |
- //
- // NSString+HTML.m
- // MWFeedParser
- //
- // Copyright (c) 2010 Michael Waterfall
- //
- // Permission is hereby granted, free of charge, to any person obtaining a copy
- // of this software and associated documentation files (the "Software"), to deal
- // in the Software without restriction, including without limitation the rights
- // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- // copies of the Software, and to permit persons to whom the Software is
- // furnished to do so, subject to the following conditions:
- //
- // 1. The above copyright notice and this permission notice shall be included
- // in all copies or substantial portions of the Software.
- //
- // 2. This Software cannot be used to archive or collect data such as (but not
- // limited to) that of events, news, experiences and activities, for the
- // purpose of any concept relating to diary/journal keeping.
- //
- // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- // THE SOFTWARE.
- //
- #import "NSString+HTML.h"
- #import "GTMNSString+HTML.h"
- @implementation NSString (HTML)
- #pragma mark -
- #pragma mark Class Methods
- #pragma mark -
- #pragma mark Instance Methods
- // Strip HTML tags
- - (NSString *)stringByConvertingHTMLToPlainText {
-
- // Pool
- NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init];
-
- // Character sets
- NSCharacterSet *stopCharacters = [NSCharacterSet characterSetWithCharactersInString:[NSString stringWithFormat:@"< \t\n\r%C%C%C%C", 0x0085, 0x000C, 0x2028, 0x2029]];
- NSCharacterSet *newLineAndWhitespaceCharacters = [NSCharacterSet characterSetWithCharactersInString:[NSString stringWithFormat:@" \t\n\r%C%C%C%C", 0x0085, 0x000C, 0x2028, 0x2029]];
- NSCharacterSet *tagNameCharacters = [NSCharacterSet characterSetWithCharactersInString:@"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"]; /**/
-
- // Scan and find all tags
- NSMutableString *result = [[NSMutableString alloc] initWithCapacity:self.length];
- NSScanner *scanner = [[NSScanner alloc] initWithString:self];
- [scanner setCharactersToBeSkipped:nil];
- [scanner setCaseSensitive:YES];
- NSString *str = nil, *tagName = nil;
- BOOL dontReplaceTagWithSpace = NO;
- do {
-
- // Scan up to the start of a tag or whitespace
- if ([scanner scanUpToCharactersFromSet:stopCharacters intoString:&str]) {
- [result appendString:str];
- str = nil; // reset
- }
-
- // Check if we've stopped at a tag/comment or whitespace
- if ([scanner scanString:@"<" intoString:NULL]) {
-
- // Stopped at a comment or tag
- if ([scanner scanString:@"!--" intoString:NULL]) {
-
- // Comment
- [scanner scanUpToString:@"-->" intoString:NULL];
-
- [scanner scanString:@"-->" intoString:NULL];
-
- } else {
-
- // Tag - remove and replace with space unless it's
- if ([scanner scanString:@"/p>" intoString:NULL]) {
- [result appendString:@"\n"];
- [result appendString:@" "];
-
- }
- if ([scanner scanString:@"/h" intoString:NULL]) {
- [result appendString:@"\n"];
- }
- if ([scanner scanString:@"img" intoString:NULL]) {
- [scanner scanUpToString:@"src" intoString:NULL];
- [scanner scanString:@"src" intoString:NULL];
- [scanner scanString:@"=" intoString:NULL];
- [scanner scanString:@"\'" intoString:NULL];
- [scanner scanString:@"\"" intoString:NULL];
- NSString *imgString;
- if ([scanner scanUpToCharactersFromSet:[NSCharacterSet characterSetWithCharactersInString:@"\"\'"] intoString:&imgString]) {
- [result appendString:[NSString stringWithFormat:@"\n<img>%@</img>\n",imgString]];
- imgString = nil; // reset
- }
-
- }
- if ([scanner scanString:@"title" intoString:NULL]) {
- [scanner scanUpToString:@"</title>" intoString:NULL];
- [scanner scanString:@"</title>" intoString:NULL];
- }
- // a closing inline tag then dont replace with a space
- if ([scanner scanString:@"/" intoString:NULL]) {
-
-
- // Closing tag - replace with space unless it's inline
- tagName = nil; dontReplaceTagWithSpace = NO;
- if ([scanner scanCharactersFromSet:tagNameCharacters intoString:&tagName]) {
- tagName = [tagName lowercaseString];
- dontReplaceTagWithSpace = ([tagName isEqualToString:@"a"] ||
- [tagName isEqualToString:@"b"] ||
- [tagName isEqualToString:@"i"] ||
- [tagName isEqualToString:@"q"] ||
- [tagName isEqualToString:@"span"] ||
- [tagName isEqualToString:@"em"] ||
- [tagName isEqualToString:@"strong"] ||
- [tagName isEqualToString:@"cite"] ||
- [tagName isEqualToString:@"abbr"] ||
- [tagName isEqualToString:@"acronym"] ||
- [tagName isEqualToString:@"label"]);
- }
-
- // Replace tag with string unless it was an inline
- if (!dontReplaceTagWithSpace && result.length > 0 && ![scanner isAtEnd]) [result appendString:@" "];
-
- }
-
- // Scan past tag
- [scanner scanUpToString:@">" intoString:NULL];
-
- [scanner scanString:@">" intoString:NULL];
-
- }
-
- } else {
-
- // Stopped at whitespace - replace all whitespace and newlines with a space
- if ([scanner scanCharactersFromSet:newLineAndWhitespaceCharacters intoString:NULL]) {
- if (result.length > 0 && ![scanner isAtEnd]) [result appendString:@" "]; // Dont append space to beginning or end of result
- }
-
- }
-
- } while (![scanner isAtEnd]);
-
- // Cleanup
- [scanner release];
-
- // Decode HTML entities and return
- NSString *retString = [[result stringByDecodingHTMLEntities] retain];
- [result release];
-
- // Drain
- [pool drain];
-
- // Return
- return [retString autorelease];
-
- }
- // Decode all HTML entities using GTM
- - (NSString *)stringByDecodingHTMLEntities {
- // gtm_stringByUnescapingFromHTML can return self so create new string ;)
- return [NSString stringWithString:[self gtm_stringByUnescapingFromHTML]];
- }
- // Encode all HTML entities using GTM
- - (NSString *)stringByEncodingHTMLEntities {
- // gtm_stringByUnescapingFromHTML can return self so create new string ;)
- return [NSString stringWithString:[self gtm_stringByEscapingForAsciiHTML]];
- }
- // Replace newlines with <br /> tags
- - (NSString *)stringWithNewLinesAsBRs {
-
- // Pool
- NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init];
-
- // Strange New lines:
- // Next Line, U+0085
- // Form Feed, U+000C
- // Line Separator, U+2028
- // Paragraph Separator, U+2029
-
- // Scanner
- NSScanner *scanner = [[NSScanner alloc] initWithString:self];
- [scanner setCharactersToBeSkipped:nil];
- NSMutableString *result = [[NSMutableString alloc] init];
- NSString *temp;
- NSCharacterSet *newLineCharacters = [NSCharacterSet characterSetWithCharactersInString:
- [NSString stringWithFormat:@"\n\r%C%C%C%C", 0x0085, 0x000C, 0x2028, 0x2029]];
- // Scan
- do {
-
- // Get non new line characters
- temp = nil;
- [scanner scanUpToCharactersFromSet:newLineCharacters intoString:&temp];
- if (temp) [result appendString:temp];
- temp = nil;
-
- // Add <br /> s
- if ([scanner scanString:@"\r\n" intoString:nil]) {
-
- // Combine \r\n into just 1 <br />
- [result appendString:@"<br />"];
-
- } else if ([scanner scanCharactersFromSet:newLineCharacters intoString:&temp]) {
-
- // Scan other new line characters and add <br /> s
- if (temp) {
- for (int i = 0; i < temp.length; i++) {
- [result appendString:@"<br />"];
- }
- }
-
- }
-
- } while (![scanner isAtEnd]);
-
- // Cleanup & return
- [scanner release];
- NSString *retString = [[NSString stringWithString:result] retain];
- [result release];
-
- // Drain
- [pool drain];
-
- // Return
- return [retString autorelease];
-
- }
- // Remove newlines and white space from strong
- - (NSString *)stringByRemovingNewLinesAndWhitespace {
-
- // Pool
- NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init];
-
- // Strange New lines:
- // Next Line, U+0085
- // Form Feed, U+000C
- // Line Separator, U+2028
- // Paragraph Separator, U+2029
-
- // Scanner
- NSScanner *scanner = [[NSScanner alloc] initWithString:self];
- [scanner setCharactersToBeSkipped:nil];
- NSMutableString *result = [[NSMutableString alloc] init];
- NSString *temp;
- NSCharacterSet *newLineAndWhitespaceCharacters = [NSCharacterSet characterSetWithCharactersInString:
- [NSString stringWithFormat:@" \t\n\r%C%C%C%C", 0x0085, 0x000C, 0x2028, 0x2029]];
- // Scan
- while (![scanner isAtEnd]) {
-
- // Get non new line or whitespace characters
- temp = nil;
- [scanner scanUpToCharactersFromSet:newLineAndWhitespaceCharacters intoString:&temp];
- if (temp) [result appendString:temp];
-
- // Replace with a space
- if ([scanner scanCharactersFromSet:newLineAndWhitespaceCharacters intoString:NULL]) {
- if (result.length > 0 && ![scanner isAtEnd]) // Dont append space to beginning or end of result
- [result appendString:@" "];
- }
-
- }
-
- // Cleanup
- [scanner release];
-
- // Return
- NSString *retString = [[NSString stringWithString:result] retain];
- [result release];
-
- // Drain
- [pool drain];
-
- // Return
- return [retString autorelease];
-
- }
- // Strip HTML tags
- // DEPRECIATED - Please use NSString stringByConvertingHTMLToPlainText
- - (NSString *)stringByStrippingTags {
-
- // Pool
- NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init];
-
- // Find first & and short-cut if we can
- NSUInteger ampIndex = [self rangeOfString:@"<" options:NSLiteralSearch].location;
- if (ampIndex == NSNotFound) {
- return [NSString stringWithString:self]; // return copy of string as no tags found
- }
-
- // Scan and find all tags
- NSScanner *scanner = [NSScanner scannerWithString:self];
- [scanner setCharactersToBeSkipped:nil];
- NSMutableSet *tags = [[NSMutableSet alloc] init];
- NSString *tag;
- do {
-
- // Scan up to <
- tag = nil;
- [scanner scanUpToString:@"<" intoString:NULL];
- [scanner scanUpToString:@">" intoString:&tag];
-
- // Add to set
- if (tag) {
- NSString *t = [[NSString alloc] initWithFormat:@"%@>", tag];
- [tags addObject:t];
- [t release];
- }
-
- } while (![scanner isAtEnd]);
-
- // Strings
- NSMutableString *result = [[NSMutableString alloc] initWithString:self];
- NSString *finalString;
-
- // Replace tags
- NSString *replacement;
- for (NSString *t in tags) {
-
- // Replace tag with space unless it's an inline element
- replacement = @" ";
- if ([t isEqualToString:@"<a>"] ||
- [t isEqualToString:@"</a>"] ||
- [t isEqualToString:@"<span>"] ||
- [t isEqualToString:@"</span>"] ||
- [t isEqualToString:@"<strong>"] ||
- [t isEqualToString:@"</strong>"] ||
- [t isEqualToString:@"<em>"] ||
- [t isEqualToString:@"</em>"]) {
- replacement = @"";
- }
-
- // Replace
- [result replaceOccurrencesOfString:t
- withString:replacement
- options:NSLiteralSearch
- range:NSMakeRange(0, result.length)];
- }
-
- // Remove multi-spaces and line breaks
- finalString = [[result stringByRemovingNewLinesAndWhitespace] retain];
-
- // Cleanup
- [result release];
- [tags release];
-
- // Drain
- [pool drain];
-
- // Return
- return [finalString autorelease];
-
- }
- /**
- 过滤HTML字符串中的图片指定宽度
-
- @param width 宽度
- @return result
- */
- - (NSString *)htmlWebAutoImageSizeWidth:(CGFloat)width{
- if (self == nil || self.length == 0) {
- return @"";
- }
- NSString *content = [self stringByReplacingOccurrencesOfString:@"&quot" withString:@"'"];
- content = [content stringByReplacingOccurrencesOfString:@"<" withString:@"<"];
- content = [content stringByReplacingOccurrencesOfString:@">" withString:@">"];
- content = [content stringByReplacingOccurrencesOfString:@""" withString:@"\""];
-
-
- NSString *html = content;
- NSString * regExpStr = @"<(img|IMG)[^\\<\\>]*>";
- NSRegularExpression *regex = [NSRegularExpression regularExpressionWithPattern:regExpStr options:NSRegularExpressionCaseInsensitive error:nil];
-
- NSArray *matches = [regex matchesInString:html
- options:0
- range:NSMakeRange(0, [html length])];
-
-
- //HTML中的<img ...... />数组
- NSMutableArray *imgArray = [NSMutableArray array];
- //<img src="URL"/>中的URL数组
- NSMutableArray *urlArray = [NSMutableArray array];
-
- for (NSTextCheckingResult *result in matches) {
- NSRange range = result.range;
- NSString *group = [html substringWithRange:range];
- NSRange srange1 = [group rangeOfString:@"http"];
- NSString *tempString1 = [group substringWithRange:NSMakeRange(srange1.location, group.length - srange1.location)];
- NSRange srange2 = [tempString1 rangeOfString:@"\""];
- NSString *tempString2 = [tempString1 substringWithRange:NSMakeRange(0,srange2.location)];
- [urlArray addObject:tempString2];
- [imgArray addObject:group];
- }
-
- for (int i = 0; i < imgArray.count; i++) {
- NSString *string = imgArray[i];
- html = [html stringByReplacingOccurrencesOfString:string withString:[NSString stringWithFormat:@"<img src=\"%@\" title=\"\" alt=\"%lld\" width=\"%f\" height=\"auto\">",urlArray[i],[NSDate timeIntervalSinceReferenceDate]+i,width]];
- }
-
-
- return html;
- }
- @end
|