R语言中str_extract_all函数

更新时间:2023-07-23 14:00:56 阅读: 评论:0

R语⾔中str_extract_all函数
这个函数是在stringr包下⾯的⼀个函数,在做数据清洗的时候还是很有⽤的,⼤概⽤法就是去提取⼀个字符串下的某种内容,按照⼀些⾃⼰想要的规则,具体⽤法如下:
x<-"abacdef12g"
str_extract_all(x,"[f0-9]")
[[1]]
[1] "f" "1" "2"
> str_extract_all(x,"[f0-9]{1,3}")
[[1]]
[1] "f12"
> str_extract_all(x,"[f0-9]{1,2}")
[[1]]
[1] "f1" "2"
附上⼀些平时写的代码
library(ggplot2)
library(RMySQL)
library(stringr)
library(sqldf)
library(plyr)
conn <- dbConnect(MySQL(), dbname = "tracker", urname="gaoyang922", password="gaoyang922@123456!",host="10.10.109.62",port=1333) dbSendQuery(conn,'SET NAMES utf8')
query<-dbSendQuery(conn, "SELECT key_table,left(inrt_time,8) as
inrt_date,label,ssionid,stay_time,site,page_url FROM tracker.hba_visit
where inrt_time is not null and page_url like '%' ")
rawdata_vi <- fetch(query,n=-1)
dbDisconnect(conn)
dim(rawdata_vi)
head(rawdata_vi)
nrow(rawdata_vi)
# rawdata_vi$prodID=as.numeric(unlist(str_extract_all(rawdata_vi$page_url,"[0-9]{1,2}")))
f<-function(x){
if(grepl("productId",x)){
result<-as.numeric(unlist(str_extract_all(x,"[0-9]{1,2}"))[1])
el{
result<-9999
}
result
}
rawdata_vi$prodID =sapply(rawdata_vi$page_url,f)
rawdata_vi_prod<-subt(rawdata_vi,prodID!=9999)
head(rawdata_vi_prod)
dim(rawdata_vi_prod)
>>####  上⾯是得到了每个url的产品ID
#### 下⾯的code 要得到每个产品的属性
生气的小莹
conn <- dbConnect(MySQL(), dbname = "pms", urname="gaoyang922", password="gaoyang922
@123456!",host="10.10.109.62",port=1333) dbSendQuery(conn,'SET NAMES utf8')
query<-dbSendQuery(conn, "lect a.*,b.BizCategoryName from
pms.pms_product a left join pms.pms_biz_category b
on a.ProductType = b.ID ")
rawdata_pd <- fetch(query,n=-1)
dbDisconnect(conn)
>##
> 每天的总结
prod_sumy1=  rawdata_vi_prod[,c(2,8)]
prod_sumy2 = count(prod_sumy1, c("inrt_date", "prodID"))
prod_sumy3 = count(prod_sumy1, "prodID")
夏荔芪胶囊
head(prod_sumy2)回头太难歌词
prod1 = merge(prod_sumy2,rawdata_pd[,c(1,4,5,31)],by.x = "prodID", by.y = "ID",all.x=T)
head(prod1)
daily_prod_rank =  prod1[with(prod1, order(-as.numeric(inrt_date),-freq)),]什么是海啸
names(daily_prod_rank)[c(1,2,3)] <- c("ProdId","InrtDate","PVCnt")英国历史时间轴
head(daily_prod_rank)
daily_prod_rank$InrtDate = as.character(daily_prod_rank$InrtDate)
### 导⼊到数据库
conn <- dbConnect(MySQL(), dbname = "analy", urname="gaoyang922", password="gaoyang922@123456!",host="10.10.109.62",port=1333) dbWriteTable(conn, "daily_prod_rank_raw", prod_sumy2)
### 产品1.关注,2.购买,3.预约,4.点赞
conn <- dbConnect(MySQL(), dbname = "pms", urname="gaoyang922", password="gaoyang922@123456!",host="10.10.109.62",port=1333)
query<-dbSendQuery(conn, "SELECT productID,RelationType,
(ca when RelationType=1 then '关注'
when  RelationType=2 then '购买'
when RelationType=4 then '点赞'
el '预约' end
)as RT_desc
,count(*) as pd_cnt FROM pms.pms_ur_relation
where CreateTime between '2015-08-18' and '2015-09-05'
group by ProductId,RelationType ")
rawdata_RT <- fetch(query,n=-1)
dbDisconnect(conn)
head(rawdata_RT)
table(prodID)
prodID_ggplot<-rawdata_vi$prodID;prodID_ggplot<-reorder(prodID_ggplot,prodID_ggplot,length)
rawdata_vi$prodID_ggplot<-prodIDe_ggplot
ggplot(subt(rawdata_vi,prodID!=9999),aes(x=prodID_ggplot))+geom_bar()
#
# library(ggplot2)
# library(RMySQL)
# library(stringr)
# library(dplyr)
# conn <- dbConnect(MySQL(), dbname = "tracker", urname="zhoumeixu204", password="zhoumeixu204@123456!",host="10.10.109.62",port=1333) # query<-dbSendQuery(conn, "SELECT key_table,left(inrt_time,8) as
#                    inrt_date,label,ssionid,stay_time,site,page_url FROM tracker.hba_visit
#                    where inrt_time is not null  ")
#
#
# query_1<-dbSendQuery(conn,"
#                      lect * from  tracker.hba_visitor  where  inrt_time
#                      is not  NULL  and  city is not NUll  and country='china'")
# rawdata_vi <- fetch(query,n=-1)
# hba_visitor<-fetch(query_1,n=-1)
# dbDisconnect(conn)
# head(rawdata_vi)
# f<-function(x){
#  if(grepl("productId",x)){
#    result<-as.numeric(unlist(str_extract_all(x,"[0-9]{1,2}"))[1])
#
#  }
#  el{
#    result<-9999
#  }
#  result
# }
红葫芦# rawdata_vi$prodID =sapply(rawdata_vi$page_url,f)
# # rawdata_vi<-subt(rawdata_vi,prodID!=9999)
# table(prodID)
#
#
# prodID_ggplot<-rawdata_vi$prodID;prodID_ggplot<-reorder(prodID_ggplot,prodID_ggplot,length)
# rawdata_vi$prodID_ggplot<-prodID_ggplot
# site_ggplot<-rawdata_vi$site;site_ggplot<-reorder(site_ggplot,site_ggplot,length)
# rawdata_vi$site_ggplot<-site_ggplot
# ggplot(subt(rawdata_vi,prodID!=9999),aes(x=prodID_ggplot))+geom_bar(aes(fill=prodID_ggplot))
# ggplot(subt(rawdata_vi,prodID!=9999),aes(x=prodID_ggplot,fill=factor(inrt_date)))+geom_bar(position = 'stack')+labs(title="移动终端占⽐柱形图")
直言直语
# ggplot(rawdata_vi,aes(x=site_ggplot,fill=factor(inrt_date)))+geom_bar(position = 'dodge')+labs(title="移动终端占⽐柱形图")
形容冰雪的成语# ggplot(rawdata_vi,aes(x=site_ggplot,fill=factor(inrt_date)))+geom_bar(position = 'stack')+labs(title="移动终端占⽐柱形图")
# ggplot(rawdata_vi,aes(x=site_ggplot,fill=factor(inrt_date)))+geom_bar(position = 'dodge')+labs(title="移动终端占⽐柱形图")+facet_wrap(~inrt_date,ncol=1) # str(rawdata_vi)
# rawdata_vi_to_mysql<-data.frame(rawdata_vi$key_table,rawdata_vi$inrt_date,rawdata_vi$label,rawdata_vi$ssionid,rawdata_vi$stay_time,rawdata_vi$site # conn <- dbConnect(MySQL(), dbname = "analy_dev", urname="root", password="Pa123456!",host="202.69.27.239",port=8443)
# rawdata_vi_to_mysql<-subt(rawdata_vi_to_mysql,rawdata_vi.prodID !=9999)
# dbWriteTable(conn, "rawdata_vi_to_mysql", rawdata_vi_to_mysql)
# dbDisconnect(conn)
#
#

本文发布于:2023-07-23 14:00:56,感谢您对本站的认可!

本文链接:https://www.wtabcd.cn/fanwen/fan/82/1112764.html

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。

标签:字符串   数据   产品   购买   函数   关注   英国
相关文章
留言与评论(共有 0 条评论)
   
验证码:
推荐文章
排行榜
Copyright ©2019-2022 Comsenz Inc.Powered by © 专利检索| 网站地图