# 零宽断言

### 谢益辉 / 2017-04-17

gregexpr("bcd(?=E)", c("abcdEfg", "abcdefg"), perl = TRUE)
## [[1]]
## [1] 2
## attr(,"match.length")
## [1] 3
## attr(,"useBytes")
## [1] TRUE
##
## [[2]]
## [1] -1
## attr(,"match.length")
## [1] -1
## attr(,"useBytes")
## [1] TRUE


# 从大写 A 向后找 bc
gregexpr("(?<=A)bc", c("Abcdefg", "abcdefg"), perl = TRUE)
## [[1]]
## [1] 2
## attr(,"match.length")
## [1] 2
## attr(,"useBytes")
## [1] TRUE
##
## [[2]]
## [1] -1
## attr(,"match.length")
## [1] -1
## attr(,"useBytes")
## [1] TRUE

# 在一个不是大写 D 的字符前面找 bc
gregexpr("bc(?!D)", c("bcDefg", "bcdefg"), perl = TRUE)
## [[1]]
## [1] -1
## attr(,"match.length")
## [1] -1
## attr(,"useBytes")
## [1] TRUE
##
## [[2]]
## [1] 1
## attr(,"match.length")
## [1] 2
## attr(,"useBytes")
## [1] TRUE

# 在不是大写 A 字符后面找 bcd
gregexpr("(?<!A)bcd", c("Abcdefg", "abcdefg"), perl = TRUE)
## [[1]]
## [1] -1
## attr(,"match.length")
## [1] -1
## attr(,"useBytes")
## [1] TRUE
##
## [[2]]
## [1] 2
## attr(,"match.length")
## [1] 3
## attr(,"useBytes")
## [1] TRUE


gregexpr("(?<=Hi )([a-zA-Z]+)(?= Sir)", c("Hi Li Sir", "Hi Li Dada"),
perl = TRUE)
## [[1]]
## [1] 4
## attr(,"match.length")
## [1] 2
## attr(,"useBytes")
## [1] TRUE
## attr(,"capture.start")
##
## [1,] 4
## attr(,"capture.length")
##
## [1,] 2
## attr(,"capture.names")
## [1] ""
##
## [[2]]
## [1] -1
## attr(,"match.length")
## [1] -1
## attr(,"useBytes")
## [1] TRUE
## attr(,"capture.start")
##
## [1,] -1
## attr(,"capture.length")
##
## [1,] -1
## attr(,"capture.names")
## [1] ""


r = "(?<=Hi )([a-zA-Z]+)(?= Sir)"
x = c("Hi Li Sir", "Hi Li Dada")
i = gregexpr(r, x, perl = TRUE)

regmatches(x, i) = lapply(regmatches(x, i), function(z) {
toupper(z)
})
x
## [1] "Hi LI Sir"  "Hi Li Dada"


gsub(r, "\\U\\1", x, perl = TRUE)
## [1] "Hi LI Sir"  "Hi Li Dada"


1. 主要原因是中美的信封写法是反的，最怕这种两个相反的选择了。