.net工具类——HTML处理
作者:互联网
#region 截取字符长度 /// <summary> /// 截取字符长度 /// </summary> /// <param name="inputString">字符</param> /// <param name="len">长度</param> /// <returns></returns> public static string SubString(string inputString, int len) { if (string.IsNullOrEmpty(inputString)) return ""; inputString = DropHTML(inputString); ASCIIEncoding ascii = new ASCIIEncoding(); int tempLen = 0; string tempString = ""; byte[] s = ascii.GetBytes(inputString); for (int i = 0; i < s.Length; i++) { if ((int)s[i] == 63) { tempLen += 2; } else { tempLen += 1; } try { tempString += inputString.Substring(i, 1); } catch { break; } if (tempLen > len) break; } //如果截过则加上半个省略号 byte[] mybyte = System.Text.Encoding.Default.GetBytes(inputString); if (mybyte.Length > len) tempString += "…"; return tempString; } #endregion #region 清除HTML标记 public static string DropHTML(string Htmlstring) { if (string.IsNullOrEmpty(Htmlstring)) return ""; //删除脚本 Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase); //删除HTML Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "", RegexOptions.IgnoreCase); Htmlstring.Replace("<", ""); Htmlstring.Replace(">", ""); Htmlstring.Replace("\r\n", ""); Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim(); return Htmlstring; } #endregion #region 清除HTML标记且返回相应的长度 public static string DropHTML(string Htmlstring, int strLen) { return SubString(DropHTML(Htmlstring), strLen); } #endregion #region TXT代码转换成HTML格式 /// <summary> /// 字符串字符处理 /// </summary> /// <param name="chr">等待处理的字符串</param> /// <returns>处理后的字符串</returns> /// //把TXT代码转换成HTML格式 public static String ToHtml(string Input) { StringBuilder sb = new StringBuilder(Input); sb.Replace(" ", " "); //sb.Replace("&", "&"); sb.Replace("<", "<"); sb.Replace(">", ">"); sb.Replace("\r\n", "<br />"); sb.Replace("\n", "<br />"); sb.Replace("\t", " "); return sb.ToString(); } #endregion #region HTML代码转换成TXT格式 /// <summary> /// 字符串字符处理 /// </summary> /// <param name="chr">等待处理的字符串</param> /// <returns>处理后的字符串</returns> /// //把HTML代码转换成TXT格式 public static String ToTxt(String Input) { StringBuilder sb = new StringBuilder(Input); sb.Replace(" ", " "); sb.Replace("<br>", "\r\n"); sb.Replace("<br>", "\n"); sb.Replace("<br />", "\n"); sb.Replace("<br />", "\r\n"); sb.Replace("<", "<"); sb.Replace(">", ">"); sb.Replace("&", "&"); return sb.ToString(); } #endregion
标签:Regex,Htmlstring,Replace,IgnoreCase,HTML,RegexOptions,sb,net,工具 来源: https://www.cnblogs.com/amusement1992/p/13496235.html