I was curious to see just how many supermarkets we have in London and used my Google Places-scraping script to mine for “food” to extract the coordinates for 10,587 stores.

All the (non-independent) stores in London are below:

Supermarkets GLA (all)

I decided that even after removing local corner-shops there were still too many and trimmed the list down a bit more:

Supermarkets GLA (Select)

Both of the plots contain Voronoi diagrams to illustrate the catchment areas of the shops – e.g.:

Basically the diagram creates an area around a point which is by definition closest to that point than to any other point. One can imagine an animation where circles are spawned from all points on the diagram and start growing at the same rate, once a circle touches another that part of the perimeter stops growing, however the other parts continue. What this means is that we get an area which “belongs” to Tesco. For a more detailed discussion about Voronoi Diagrams and Delaunay Triangulation (which can be used as a way to generate the concave hull for isochrones) I found this link very interesting.

Finally, after producing the diagrams I intersected the supermarkets with London Boroughs to discover what the most popular supermarket was – no surprise that it was Tesco (Wandsworth was the only Sainsbury’s borough!)

R-Script:

library(rgeos)
library(rgdal)
library(ggplot2)
library(sp)
####################################################################################################

# Shape-files stored here
my_shape_files_loc = '.../3. Supermarket Chain Voronoi'
setwd(my_shape_files_loc)

# Base-map (Greater London)
london_shape_loc <- paste(my_shape_files_loc,'/london_sport',sep='')
london <- readOGR(london_shape_loc, layer="london_sport") 
london <- spTransform(london, CRS("+proj=longlat +datum=WGS84"))

# Remove boundaries (just London, not boroughs)
london_no_bounds <- gUnion(london, london)

# Stores (produced using Python Google-Places scraping script)
stores <- read.csv("food store_python_mined.csv", header=FALSE)
names(stores) <- c("Name", "Address", "Latitude", "Longitude", "Type", "Places_ID")

# Extract most supermarkets using grep
stores$Supermarket[grepl("waitrose",stores$Name, ignore.case = TRUE)] <- "Waitrose"
stores$Supermarket[grepl("sainsbury",stores$Name, ignore.case = TRUE)] <- "Sainsbury's"
stores$Supermarket[grepl("morrison",stores$Name, ignore.case = TRUE)] <- "Morrisons"
stores$Supermarket[((grepl("mark",stores$Name, ignore.case = TRUE)&
                   grepl("spencer",stores$Name, ignore.case = TRUE))|
                   grepl("M & S",stores$Name, ignore.case = TRUE))] <- "Marks & Spencer"
stores$Supermarket[(grepl("co",stores$Name, ignore.case = TRUE)&
                   grepl("operative",stores$Name, ignore.case = TRUE))] <- "Co-operative Group"
stores$Supermarket[grepl("asda",stores$Name, ignore.case = TRUE)] <- "Asda"
#stores$Supermarket[grepl("aldi",stores$Name, ignore.case = TRUE)] <- "Aldi"
stores$Supermarket[grepl("lidl",stores$Name, ignore.case = TRUE)] <- "Lidl"
stores$Supermarket[grepl("iceland",stores$Name, ignore.case = TRUE)] <- "Iceland"
stores$Supermarket[grepl("londis",stores$Name, ignore.case = TRUE)] <- "Londis"
#stores$Supermarket[grepl("esso",stores$Name, ignore.case = TRUE)] <- "Esso"
stores$Supermarket[grepl("tesco",stores$Name, ignore.case = TRUE)] <- "Tesco"
stores$Supermarket[grepl("costcutter",stores$Name, ignore.case = TRUE)] <- "Costcutter"
stores$Supermarket[grepl("budgens",stores$Name, ignore.case = TRUE)] <- "Budgens"
stores$Supermarket[is.na(stores$Supermarket)] <- "Other"

# Preview
write.table(stores,"test_regex.csv", row.names = FALSE, sep = ",")

library(plyr)
top_stores <- as.data.frame(count(stores$Supermarket))
top_stores <- top_stores[order(-top_stores$freq),] 

# Extract top stores
stores_plot <- stores[stores$Supermarket!="Other",]

# De-duplicate by Latitude/Longitude
unq_stores<- stores_plot[,c("Supermarket", "Latitude", "Longitude")]
unq_stores <- unq_stores[!duplicated(unq_stores[c("Latitude", "Longitude")]),]
unq_stores_plot <- unq_stores

# Set projection of data
coordinates(unq_stores) <- c("Longitude", "Latitude")
proj4string(unq_stores) <- CRS('+proj=longlat +datum=WGS84')
plot(unq_stores)

########################################
## Voronoi Polygons
########################################

voronoipolygons = function(layer) {
  # This function was not originally written by me
  require(deldir)
  crds = layer@coords
  # Added RW to prevent clipping rw=c(-1,1,51,52)
  # Should really write a function to generate a bounding box
  # for the data automatically and pass that.
  # The reason for doing is that if our supermarkers don't cover
  # The "London" shapefile i.e. north London has none; then neither will the
  # voronoi polygons. Whereas we want to show that if there are no supermarkets
  # in north London, then the closest supermarket is somewhere south
  z = deldir(crds[,1], crds[,2],rw=c(-1,1,51,52))
  w = tile.list(z)
  polys = vector(mode='list', length=length(w))
  require(sp)
  for (i in seq(along=polys)) {
    pcrds = cbind(w[[i]]$x, w[[i]]$y)
    pcrds = rbind(pcrds, pcrds[1,])
    polys[[i]] = Polygons(list(Polygon(pcrds)), ID=as.character(i))
  }
  SP = SpatialPolygons(polys)
  voronoi = SpatialPolygonsDataFrame(SP, data=data.frame(x=crds[,1], 
                                                         y=crds[,2], row.names=sapply(slot(SP, 'polygons'), 
                                                                                      function(x) slot(x, 'ID'))))
}

####################################################################################################

# Create Voronoi Polygons
stores_plot_vor <- voronoipolygons(unq_stores)
proj4string(stores_plot_vor) <- CRS('+proj=longlat +datum=WGS84')
plot(stores_plot_vor)

# Merge on supermarket information from points to voronoi polygons
# The stores_plot_vor_df will contain only the 'shape/geometry' information and
# so we need to add the 'database' information
stores_plot_vor_df <- fortify(stores_plot_vor)

# Intersect polygons (voronoi and london)
# Basically we want to clip the rectangle of voronoi polygons to just
# the Polygon of London
clip <- gIntersection(stores_plot_vor,london_no_bounds, byid=TRUE, drop_lower_td=TRUE)
clip_df <- fortify(clip)

# The gIntersection command strips away the accompanying data
# So we merge it back on using the IDs

# [A]: SHAPE
clip_df$merge_id <- gsub(" 1", "", clip_df$id)

# [B]: DATA
unq_stores_df <- as.data.frame(unq_stores)
# Add the row number as an ID to merge on
unq_stores_df$merge_id <- 1:nrow(unq_stores_df)
# We just want the supermarket name to be mergd on
unq_stores_df <- unq_stores_df[c("Supermarket", "merge_id")]
# Outer merge by ID (keep missing)
clipped_voronoi <- merge(clip_df, unq_stores_df, by='merge_id', all.x=TRUE)
# Check all match (i.e. if keep missing and there are none, then OK)
assertthat::noNA(clipped_voronoi$Supermarket)

# Remove axis
xquiet<- scale_x_continuous("", breaks=NULL)
yquiet<-scale_y_continuous("", breaks=NULL)
quiet<-list(xquiet, yquiet)

# ADD Water (clipped from OS)
rivers_loc <- paste(my_shape_files_loc,'/os_vector_map_merged',sep='')
rivers <- readOGR(rivers_loc, layer="TQ_TidalWater_Merge_Clip") 
rivers <- spTransform(rivers, CRS("+proj=longlat +datum=WGS84"))
#plot(rivers)

#Plot 
ggplot() +
  # Base
  geom_polygon(data=clipped_voronoi, aes(long, lat, group=group, fill=Supermarket),
               colour = NA, size=0.1, alpha=0.95) +
  
  scale_fill_manual(values=c(
  "Tesco"="#00539F",
  "Londis"="#3FD5A9",
  "Sainsbury's"="#EF8C00",
  "Costcutter"="#FF45D1",
  "Co-operative Group"="#88FF4B",
  "Iceland"="#7B0099",
  "Waitrose"="#5C8018",
  "Marks & Spencer"="#0A0D10",
  "Lidl"="#FE0002",
  "Morrisons"="#F4F51E",
  "Budgens"="#53A633",
  "Asda"="#D7E9E9")) +
  
  geom_polygon(data=london, aes(long, lat, group=group),
               fill = NA, color = 'white', size=0.4, alpha=1)+
  
  # Points
  geom_point(data=unq_stores_plot, aes(x=Longitude, y=Latitude),
             colour='black', alpha=1, size=1)  +
  
  # River as background
  geom_polygon(data=rivers, aes(long, lat, group=group),
               color='#ADD8E6', fill='#ADD8E6', size=1, alpha=1)+
  
ggsave(file='supermarkets_out_all.png', width=30,height=15,dpi=360)

########################################
## Count by Borough 
## (Out of Interest: Most Popular for each Borough)
########################################

# [A] We intersect the two spatial objects (unq_stores) and (london)
# Which maps on the AREA i.e. Borough from London to the data
# in our stores database
print(london$name)
# 33 Boroughs
stores_by_borough <- gIntersection(unq_stores,london, byid=TRUE)
# Stores outside of london are now dropped
# Convert to data-frame the stores in london
stores_by_borough_df <- as.data.frame(stores_by_borough)
# We just need to merge on the store information
# Which we will do using the rownames
# The rownames correspond to ID_in_unq_stores+"space"+ID_area_in_London
stores_by_borough_df$area <- rownames(stores_by_borough_df)
# We just want to isolate the ID_area_in_London and then use that
# to merge on the area names
stores_by_borough_df$area <- sub("([0-9]+[ ])","",stores_by_borough_df$area)

# [B] Merge on store name by coordinates (either by first part of row-name or just
# latitude and longitude which are unique)
# I use lat/long:
brands_area <- merge(stores_by_borough_df, unq_stores_plot,
                     by.x = c("x","y"), by.y = c("Longitude", "Latitude"), all = TRUE)
# Drop those not in London (although we could have used an inner-join and specified
# all.x = TRUE
brands_area <- brands_area[!is.na(brands_area$area),]

# [C] Generate a count by supermarket for each area
brands_area$c <- 1
brands_area_sum <- ddply(brands_area, c("area", "Supermarket"), summarize, count=sum(c)) 

# [D] Keep top supermarket for each borough
brands_area_sum <- brands_area_sum[order(-brands_area_sum$count), ]
d <- by(brands_area_sum, brands_area_sum["area"], head, n=1)
d_df <- Reduce(rbind, d)
# Tesco, tesco, tesco, boring ...

# [E] Merge on names of Borough
names <- as.data.frame(london$name)
names$area <- 1:nrow(names)
# Subtract 1 as count starts from 0
names$area <- names$area - 1
shop_by_borough <- merge(names, d_df, by='area', all=TRUE)

# [F] Inspect final data
shop_by_borough <- shop_by_borough[order(-shop_by_borough$count),]
print(shop_by_borough)
#     area            london$name        Supermarket  count
# 30   29            Westminster              Tesco    34
# 32   31          Tower Hamlets              Tesco    32
# 11   10                Lambeth              Tesco    27
# 12   11              Southwark              Tesco    27
# 18   17                 Harrow              Tesco    26
# 16   15 Hammersmith and Fulham              Tesco    25
# 22   21                 Newham              Tesco    25
# 20   19              Islington              Tesco    24
# 31   30                 Camden              Tesco    24
# 9     8             Wandsworth        Sainsbury's    23
# ...