left_join 基于 R 中最近的 LAT_LON

Ami*_*hak 3 r spatial left-join dplyr

我试图在参考我的原始 data.frame 的 data.frame 中找到最接近的 LAT_LON 的 ID。我已经通过将 data.frames 合并到一个唯一标识符上并根据distHaverSine来自geosphere. 现在,我想更进一步,加入没有唯一标识符的 data.frames 并找到最接近 LAT-LON 的 ID。合并后我使用了以下代码:

v3 <-v2 %>% mutate(CTD = distHaversine(cbind(LON.x, LAT.x), cbind(LON.y, LAT.y)))

数据:

loc <- data.frame(station = c('Baker Street','Bank'),
     lat = c(51.522236,51.5134047),
     lng = c(-0.157080, -0.08905843),
               postcode = c('NW1','EC3V'))
Run Code Online (Sandbox Code Playgroud)
stop <- data.frame(station = c('Angel','Barbican','Barons Court','Bayswater'),
                lat = c(51.53253,51.520865,51.490281,51.51224),
                lng = c(-0.10579,-0.097758,-0.214340,-0.187569),
                postcode = c('EC1V','EC1A', 'W14', 'W2'))

Run Code Online (Sandbox Code Playgroud)

作为最终结果,我想要这样的东西:

df <- data.frame(loc = c('Baker Street','Bank','Baker Street','Bank','Baker Street','Bank','Baker 
        Street','Bank'), 
              stop = c('Angel','Barbican','Barons Court','Bayswater','Angel','Barbican','Barons Court','Bayswater'), 
              dist = c('x','x','x','x','x','x','x','x'), 
              lat = c(51.53253,51.520865,51.490281,51.51224,51.53253,51.520865,51.490281,51.51224), 
              lng = c(-0.10579,-0.097758,-0.214340,-0.187569,-0.10579,-0.097758,-0.214340,-0.187569),
              postcode = c('EC1V','EC1A', 'W14', 'W2','EC1V','EC1A', 'W14', 'W2')
              )

Run Code Online (Sandbox Code Playgroud)

任何帮助表示赞赏。谢谢。

Jan*_*aan 5

由于物体之间的距离很小,我们可以通过使用坐标之间的欧几里得距离来加快计算速度。由于我们不在赤道附近,lng 坐标被压缩了一点;我们可以通过稍微缩放 lng 来使比较稍微好一点。

cor_stop <- stop[, c("lat", "lng")]
cor_stop$lng <- cor_stop$lng * sin(mean(cor_stop$lat, na.rm = TRUE)/180*pi)
cor_loc <- loc[, c("lat", "lng")]
cor_loc$lng <- cor_loc$lng * sin(mean(cor_loc$lat, na.rm = TRUE)/180*pi)
Run Code Online (Sandbox Code Playgroud)

然后我们可以使用FNN包计算每个位置的最近停靠点,该包使用基于树的搜索来快速找到最近的 K 个邻居。这应该扩展到大数据集(我已经将它用于具有数百万条记录的数据集):

library(FNN)
matches <- knnx.index(cor_stop, cor_loc, k = 1)
matches
Run Code Online (Sandbox Code Playgroud)
##      [,1]
## [1,]    4
## [2,]    2
Run Code Online (Sandbox Code Playgroud)

然后我们可以构建最终结果:

library(FNN)
matches <- knnx.index(cor_stop, cor_loc, k = 1)
matches
Run Code Online (Sandbox Code Playgroud)

并计算实际距离:

##      [,1]
## [1,]    4
## [2,]    2
Run Code Online (Sandbox Code Playgroud)
##          station      lat         lng postcode stop_station stop_lat  stop_lng
## 1 Baker Street 51.52224 -0.15708000      NW1    Bayswater 51.51224 -0.187569
## 2         Bank 51.51340 -0.08905843     EC3V     Barbican 51.52087 -0.097758
##   stop_postcode     dist
## 1            W2 2387.231
## 2          EC1A 1026.091
Run Code Online (Sandbox Code Playgroud)

我不确定经纬度中最近的点也是“鸟飞时”的最近点,您可以使用此方法首先选择经纬度中最近的 K 个点;然后计算这些点的距离,然后选择最近的点。


mrh*_*ann 5

所有连接、距离计算和绘图都可以使用可用的 R 包来完成。

library(tidyverse)
library(sf)
#> Linking to GEOS 3.6.2, GDAL 2.2.3, PROJ 4.9.3
library(nngeo)
library(mapview)

## Original data
loc <- data.frame(station = c('Baker Street','Bank'),
                  lat = c(51.522236,51.5134047),
                  lng = c(-0.157080, -0.08905843),
                  postcode = c('NW1','EC3V'))

stop <- data.frame(station = c('Angel','Barbican','Barons Court','Bayswater'),
                   lat = c(51.53253,51.520865,51.490281,51.51224),
                   lng = c(-0.10579,-0.097758,-0.214340,-0.187569),
                   postcode = c('EC1V','EC1A', 'W14', 'W2'))

df <- data.frame(loc = c('Baker Street','Bank','Baker Street','Bank','Baker Street','Bank','Baker 
        Street','Bank'), 
                 stop = c('Angel','Barbican','Barons Court','Bayswater','Angel','Barbican','Barons Court','Bayswater'), 
                 dist = c('x','x','x','x','x','x','x','x'), 
                 lat = c(51.53253,51.520865,51.490281,51.51224,51.53253,51.520865,51.490281,51.51224), 
                 lng = c(-0.10579,-0.097758,-0.214340,-0.187569,-0.10579,-0.097758,-0.214340,-0.187569),
                 postcode = c('EC1V','EC1A', 'W14', 'W2','EC1V','EC1A', 'W14', 'W2')
)



## Create sf objects from lat/lon points
loc_sf <- loc %>% st_as_sf(coords = c('lng', 'lat'), remove = T) %>%
  st_set_crs(4326) 

stop_sf <- stop %>% st_as_sf(coords = c('lng', 'lat'), remove = T) %>%
  st_set_crs(4326) 


# Use st_nearest_feature to cbind loc to stop by nearest points
joined_sf <- stop_sf %>% 
  cbind(
    loc_sf[st_nearest_feature(stop_sf, loc_sf),])


## mutate to add column showing distance between geometries
joined_sf %>%
  mutate(dist = st_distance(geometry, geometry.1, by_element = T))
#> Simple feature collection with 4 features and 5 fields
#> Active geometry column: geometry
#> geometry type:  POINT
#> dimension:      XY
#> bbox:           xmin: -0.21434 ymin: 51.49028 xmax: -0.097758 ymax: 51.53253
#> epsg (SRID):    4326
#> proj4string:    +proj=longlat +datum=WGS84 +no_defs
#>        station postcode    station.1 postcode.1                   geometry
#> 1        Angel     EC1V         Bank       EC3V  POINT (-0.10579 51.53253)
#> 2     Barbican     EC1A         Bank       EC3V POINT (-0.097758 51.52087)
#> 3 Barons Court      W14 Baker Street        NW1  POINT (-0.21434 51.49028)
#> 4    Bayswater       W2 Baker Street        NW1 POINT (-0.187569 51.51224)
#>                    geometry.1         dist
#> 1 POINT (-0.08905843 51.5134) 2424.102 [m]
#> 2 POINT (-0.08905843 51.5134) 1026.449 [m]
#> 3   POINT (-0.15708 51.52224) 5333.417 [m]
#> 4   POINT (-0.15708 51.52224) 2390.791 [m]



## Use nngeo and mapview to plot lines on a map
# NOT run for reprex, output image attached 
#connected <- st_connect(stop_sf, loc_sf)
# mapview(connected) + 
#   mapview(loc_sf, color = 'red') +
#   mapview(stop_sf, color = 'black')
Run Code Online (Sandbox Code Playgroud)

reprex 包(v0.3.0)于 2020 年 1 月 21 日创建

在此处输入图片说明