Ami*_*hak 3 r spatial left-join dplyr
我试图在参考我的原始 data.frame 的 data.frame 中找到最接近的 LAT_LON 的 ID。我已经通过将 data.frames 合并到一个唯一标识符上并根据distHaverSine来自geosphere. 现在,我想更进一步,加入没有唯一标识符的 data.frames 并找到最接近 LAT-LON 的 ID。合并后我使用了以下代码:
v3 <-v2 %>% mutate(CTD = distHaversine(cbind(LON.x, LAT.x), cbind(LON.y, LAT.y)))
数据:
loc <- data.frame(station = c('Baker Street','Bank'),
lat = c(51.522236,51.5134047),
lng = c(-0.157080, -0.08905843),
postcode = c('NW1','EC3V'))
Run Code Online (Sandbox Code Playgroud)
stop <- data.frame(station = c('Angel','Barbican','Barons Court','Bayswater'),
lat = c(51.53253,51.520865,51.490281,51.51224),
lng = c(-0.10579,-0.097758,-0.214340,-0.187569),
postcode = c('EC1V','EC1A', 'W14', 'W2'))
Run Code Online (Sandbox Code Playgroud)
作为最终结果,我想要这样的东西:
df <- data.frame(loc = c('Baker Street','Bank','Baker Street','Bank','Baker Street','Bank','Baker
Street','Bank'),
stop = c('Angel','Barbican','Barons Court','Bayswater','Angel','Barbican','Barons Court','Bayswater'),
dist = c('x','x','x','x','x','x','x','x'),
lat = c(51.53253,51.520865,51.490281,51.51224,51.53253,51.520865,51.490281,51.51224),
lng = c(-0.10579,-0.097758,-0.214340,-0.187569,-0.10579,-0.097758,-0.214340,-0.187569),
postcode = c('EC1V','EC1A', 'W14', 'W2','EC1V','EC1A', 'W14', 'W2')
)
Run Code Online (Sandbox Code Playgroud)
任何帮助表示赞赏。谢谢。
由于物体之间的距离很小,我们可以通过使用坐标之间的欧几里得距离来加快计算速度。由于我们不在赤道附近,lng 坐标被压缩了一点;我们可以通过稍微缩放 lng 来使比较稍微好一点。
cor_stop <- stop[, c("lat", "lng")]
cor_stop$lng <- cor_stop$lng * sin(mean(cor_stop$lat, na.rm = TRUE)/180*pi)
cor_loc <- loc[, c("lat", "lng")]
cor_loc$lng <- cor_loc$lng * sin(mean(cor_loc$lat, na.rm = TRUE)/180*pi)
Run Code Online (Sandbox Code Playgroud)
然后我们可以使用FNN包计算每个位置的最近停靠点,该包使用基于树的搜索来快速找到最近的 K 个邻居。这应该扩展到大数据集(我已经将它用于具有数百万条记录的数据集):
library(FNN)
matches <- knnx.index(cor_stop, cor_loc, k = 1)
matches
Run Code Online (Sandbox Code Playgroud)
## [,1]
## [1,] 4
## [2,] 2
Run Code Online (Sandbox Code Playgroud)
然后我们可以构建最终结果:
library(FNN)
matches <- knnx.index(cor_stop, cor_loc, k = 1)
matches
Run Code Online (Sandbox Code Playgroud)
并计算实际距离:
## [,1]
## [1,] 4
## [2,] 2
Run Code Online (Sandbox Code Playgroud)
## station lat lng postcode stop_station stop_lat stop_lng
## 1 Baker Street 51.52224 -0.15708000 NW1 Bayswater 51.51224 -0.187569
## 2 Bank 51.51340 -0.08905843 EC3V Barbican 51.52087 -0.097758
## stop_postcode dist
## 1 W2 2387.231
## 2 EC1A 1026.091
Run Code Online (Sandbox Code Playgroud)
我不确定经纬度中最近的点也是“鸟飞时”的最近点,您可以使用此方法首先选择经纬度中最近的 K 个点;然后计算这些点的距离,然后选择最近的点。
所有连接、距离计算和绘图都可以使用可用的 R 包来完成。
library(tidyverse)
library(sf)
#> Linking to GEOS 3.6.2, GDAL 2.2.3, PROJ 4.9.3
library(nngeo)
library(mapview)
## Original data
loc <- data.frame(station = c('Baker Street','Bank'),
lat = c(51.522236,51.5134047),
lng = c(-0.157080, -0.08905843),
postcode = c('NW1','EC3V'))
stop <- data.frame(station = c('Angel','Barbican','Barons Court','Bayswater'),
lat = c(51.53253,51.520865,51.490281,51.51224),
lng = c(-0.10579,-0.097758,-0.214340,-0.187569),
postcode = c('EC1V','EC1A', 'W14', 'W2'))
df <- data.frame(loc = c('Baker Street','Bank','Baker Street','Bank','Baker Street','Bank','Baker
Street','Bank'),
stop = c('Angel','Barbican','Barons Court','Bayswater','Angel','Barbican','Barons Court','Bayswater'),
dist = c('x','x','x','x','x','x','x','x'),
lat = c(51.53253,51.520865,51.490281,51.51224,51.53253,51.520865,51.490281,51.51224),
lng = c(-0.10579,-0.097758,-0.214340,-0.187569,-0.10579,-0.097758,-0.214340,-0.187569),
postcode = c('EC1V','EC1A', 'W14', 'W2','EC1V','EC1A', 'W14', 'W2')
)
## Create sf objects from lat/lon points
loc_sf <- loc %>% st_as_sf(coords = c('lng', 'lat'), remove = T) %>%
st_set_crs(4326)
stop_sf <- stop %>% st_as_sf(coords = c('lng', 'lat'), remove = T) %>%
st_set_crs(4326)
# Use st_nearest_feature to cbind loc to stop by nearest points
joined_sf <- stop_sf %>%
cbind(
loc_sf[st_nearest_feature(stop_sf, loc_sf),])
## mutate to add column showing distance between geometries
joined_sf %>%
mutate(dist = st_distance(geometry, geometry.1, by_element = T))
#> Simple feature collection with 4 features and 5 fields
#> Active geometry column: geometry
#> geometry type: POINT
#> dimension: XY
#> bbox: xmin: -0.21434 ymin: 51.49028 xmax: -0.097758 ymax: 51.53253
#> epsg (SRID): 4326
#> proj4string: +proj=longlat +datum=WGS84 +no_defs
#> station postcode station.1 postcode.1 geometry
#> 1 Angel EC1V Bank EC3V POINT (-0.10579 51.53253)
#> 2 Barbican EC1A Bank EC3V POINT (-0.097758 51.52087)
#> 3 Barons Court W14 Baker Street NW1 POINT (-0.21434 51.49028)
#> 4 Bayswater W2 Baker Street NW1 POINT (-0.187569 51.51224)
#> geometry.1 dist
#> 1 POINT (-0.08905843 51.5134) 2424.102 [m]
#> 2 POINT (-0.08905843 51.5134) 1026.449 [m]
#> 3 POINT (-0.15708 51.52224) 5333.417 [m]
#> 4 POINT (-0.15708 51.52224) 2390.791 [m]
## Use nngeo and mapview to plot lines on a map
# NOT run for reprex, output image attached
#connected <- st_connect(stop_sf, loc_sf)
# mapview(connected) +
# mapview(loc_sf, color = 'red') +
# mapview(stop_sf, color = 'black')
Run Code Online (Sandbox Code Playgroud)
由reprex 包(v0.3.0)于 2020 年 1 月 21 日创建