appendix.qmd

# Performance 

Poner aqui el problema

## Data
### Direct: Download files

```{r}
#| echo: false
#| message: false
#| warning: false

options(scipen=999)
library(DT)
library(tidyverse)
data_table <- read_csv("data_table.csv") |> dplyr::select(c("file","section","url")) 
data_table$url <- paste0('<a href="', data_table$url,'">','Download: ', data_table$file, '</a>')
data_table |>
  DT::datatable(escape= FALSE)
```


### Indirect: Build from queries

meter tambien el od_2722 que sin eso no puedes construir

**Origin-Destination matrix**

```{sql}
#| eval: false
#| connection: eisenberg_connection
#| code-summary: "Create OD matrix weighted sampling"


---- Create 200 origin
CREATE TABLE sampling_weight_200_origin  AS
with porto_200_origin AS (
        SELECT
            * 
        FROM 
            od_2728_snapped_origin 
        ORDER BY random() LIMIT 200)
        SELECT * FROM  porto_200_origin;  
---- Create 200 destination
CREATE TABLE sampling_weight_200_destination  AS
with porto_200_destination AS (
        SELECT
            * 
        FROM 
            od_2728_snapped_origin 
        ORDER BY random() LIMIT 200)
        SELECT * FROM  porto_200_destination;  
```

**Creating table with x,y for pgr_astar()**

```{sql}
#| connection: eisenberg_connection
#| eval: false
#| code-summary: "Create network table with x,y for pgr_astar()"

CREATE TABLE porto_alegre_net_largest_astar AS		    
WITH porto_alegre_net_astart AS (
SELECT 
  *,
  st_startpoint(the_geom) AS start_pt,
  st_endpoint(the_geom) AS enstart_pt
FROM 
  porto_alegre_net_largest AS net)
SELECT *,
	st_x(start_pt) AS x1,
	st_y(start_pt) AS y1,
	st_x(end_pt) AS x2,
	st_y(end_pt) AS y2
FROM 
  porto_alegre_net_astart;
```


**Applying spatial index**

```{sql}
#| connection: eisenberg_connection
#| eval: false
#| code-summary: "Applying index to weighted OD matrix"

---- Create index for origin
CREATE INDEX idx_sampling_weight_200_origin_net_id ON sampling_weight_200_origin USING hash(net_id);
CREATE INDEX idx_sampling_weight_200_origin_geom ON sampling_weight_200_origin USING gist(the_geom);
---- Create index for destination
CREATE INDEX idx_sampling_weight_200_destination_net_id ON sampling_weight_200_destination USING hash(net_id);
CREATE INDEX idx_sampling_weight_200_destination_geom ON sampling_weight_200_destination USING gist(the_geom);
---- Cluster
CLUSTER porto_alegre_net_largest USING idx_porto_alegre_net_largest_geom;
---- Vacuum clean
VACUUM(full, ANALYZE) sampling_weight_200_origin;
VACUUM(full, ANALYZE) sampling_weight_200_destination;
VACUUM(full, ANALYZE) porto_alegre_net_largest;
```

```{sql}
#| connection: eisenberg_connection
#| eval: false
#| code-summary: "Applying spatial index in the network"
CREATE INDEX idx_porto_alegre_net_largest_geom ON porto_alegre_net_largest  USING GIST(the_geom);
CREATE INDEX idx_porto_alegre_net_largest_source ON porto_alegre_net_largest  USING btree(source);
CREATE INDEX idx_porto_alegre_net_largest_target ON porto_alegre_net_largest  USING btree(target);
CREATE INDEX idx_porto_alegre_net_largest_cost ON porto_alegre_net_largest  USING btree(cost);
CREATE INDEX idx_porto_alegre_net_largest_bidirectid ON porto_alegre_net_largest  USING btree(bidirectid);
```

```{sql}
#| connection: eisenberg_connection
#| eval: false
#| code-summary: "Create OD matrix regular sampling for agg_array()"

---- create origin_destination
CREATE TEMP TABLE vertices_lookup_10
AS             
WITH all_pairs AS (
  SELECT f.net_id AS fid, f.the_geom AS fgeom,
         t.net_id AS tid, t.the_geom AS tgeom
    FROM random_10_origin AS f,
         random_10_destination AS t
),
vertices AS (
  SELECT fid, tid,
     (SELECT id
        FROM porto_alegre_net_largest_vertices_pgr AS way
       ORDER BY way.the_geom <-> fgeom
       LIMIT 1) AS fv,
     (SELECT id
        FROM porto_alegre_net_largest_vertices_pgr AS way
       ORDER BY way.the_geom <-> tgeom
       LIMIT 1) AS tv
  FROM all_pairs
)
SELECT * FROM vertices;
---- Number of OD
SELECT count(*) FROM vertices_lookup_10;
```


```{sql}
#| connection: eisenberg_connection
#| eval: false
#| code-summary: "Applying index to network for astrar()"

---- adding spatial index
CREATE INDEX idx_porto_alegre_net_largest_astar_net_the_geom ON porto_alegre_net_largest_astar  USING gist(the_geom);
CREATE INDEX idx_porto_alegre_net_largest_astar_net_start ON porto_alegre_net_largest_astar  USING gist(start_pt);
CREATE INDEX idx_porto_alegre_net_largest_astar_net_end ON porto_alegre_net_largest_astar USING gist(end_pt);
CREATE INDEX idx_porto_alegre_net_largest_astar_net_x1 ON porto_alegre_net_largest_astar  USING btree(x1);
CREATE INDEX idx_porto_alegre_net_largest_astar_net_y1 ON porto_alegre_net_largest_astar USING btree(y1);
CREATE INDEX idx_porto_alegre_net_largest_astar_net_x2 ON porto_alegre_net_largest_astar USING btree(x2);
CREATE INDEX idx_porto_alegre_net_largest_astar_net_y2 ON porto_alegre_net_largest_astar USING btree(y2);                      
```


```{sql}
#| connection: eisenberg_connection
#| eval: false
#| code-summary:  "Applying index on vertices_lookup"

---- Create index
CREATE INDEX idx_vertices_lookup_10_fid ON vertices_lookup_10 USING hash(fid);
CREATE INDEX idx_vertices_lookup_10_tid ON vertices_lookup_10 USING hash(tid);
CREATE INDEX idx_vertices_lookup_10_fv ON vertices_lookup_10 USING hash(fv);
CREATE INDEX idx_vertices_lookup_10_tv ON vertices_lookup_10 USING hash(tv);
---- Vacuum and clean
VACUUM(full, ANALYZE) vertices_lookup_10;
VACUUM(full, ANALYZE) porto_alegre_net_largest;
```

## Routing

```{r}
#| warning: false
#| message: false
#| echo: false
#| include: false

lapply(c("readODS","tidyverse","ggplot2","DT"), require, character.only =TRUE)
performance <- readODS::read_ods("metrics.ods")
df <- performance |> dplyr::select(c("number_od","method","algorithm","time","max_centrality","count_rows","query","analyze")) |> filter(!is.na(time) & method %in% c('naive','array_agg','st_bbox'))
DT::datatable(subset(df, select=c("number_od","algorithm","time","query","analyze")), 
              class='compact', rownames=FALSE, escape=FALSE, caption='Data description',
              extensions=c("Buttons",'RowGroup'),
              options=list(
                  order=list(list(0, 'asc'), list(2,'asc')),  # Sort by the first column (index 0)
                  rowGroup=list(dataSrc=0),    # Fixed rowGroup
                  dom="Bfrtip",
                  columnDefs = list(list(visible=FALSE, targets= c(3,4))),
                  buttons=c("copy", "csv", "pdf"),
                  initComplete = JS(
                      "function(settings, json) {",
                      "$(this.api().table().header()).css({'background-color': '#d50038', 'color': '#fff'});",
                      "}")
              )
) |> 
    DT::formatStyle("time",
                    background=DT::styleColorBar(range(df$time), '#ee8b8b'),
                    backgroundSize='98% 88%',
                    backgroundRepeat='no-repeat',
                    backgroundPosition='center')
```

## Method

```{r}

data_dijkstra_method <- read.csv("/home/ricardo/heigit_bookdown/metrics_method_dijkstra.csv",sep=",")
ggplot(data_dijkstra_method, aes(x=number_od, y=time, group = method)) +
  labs(x="Number of Origin-Destination (OD)",
       y="time (ms)",
       title="Pgrouting pgr_dijkstra() performance",
       subtitle="using naive, bbox, array_ag() methods") +
  geom_line(aes(color=method)) +
   geom_point(aes(color=method)) +
  theme_minimal()
```


### Naive

Centrality based on

1. [Blog from Daniel I. Patterson](https://urbandatacyclist.com/2020/04/18/how-to-measure-centrality-among-bike-share-trips-using-pgrouting/)
2. [Book by Matt Forrest "Listing 3.26: Final query",page 435](https://locatepress.com/book/spatial-sql)

```{sql}
#| eval: false

--- Run query
EXPLAIN ANALYZE
 CREATE TABLE centrality_200_200_porto AS
 SELECT   b.ogc_fid,
 b.the_geom,
 count(the_geom) as centrality 
 FROM  pgr_dijkstra('SELECT  ogc_fid AS id,
							  fromid AS source,
							toid AS target,
							weight AS cost
				      FROM porto_alegre_net_largest',
				      ARRAY(SELECT net_id AS start_id FROM porto_200_origin  ),
				      ARRAY(SELECT net_id AS end_id FROM porto_200_destination ),
				      directed := TRUE) j
				      LEFT JOIN porto_alegre_net_largest AS b
				      ON j.edge = b.ogc_fid
				      GROUP BY  b.ogc_fid, b.the_geom
				      ORDER BY centrality DESC;  
```

### Method bounding box

Code based on:

1. [Stackoverflow from "simpleuser001" ](https://gis.stackexchange.com/questions/69722/why-is-any-pgr-routing-function-taking-forever-based-on-osm-data-in-an-pgrouti/69825#69825)

```{sql}
#| eval: false

EXPLAIN ANALYZE 
CREATE TABLE centrality_200_200_porto_astrar_bbox AS 
SELECT    
	b.id,
	b.the_geom,
	count(the_geom) as centrality
FROM 
	pgr_astar(  
		'SELECT id,
			    source,
			    target,
			    cost,
			    x1,
			    y1,
			    x2,
			    y2  
			FROM 
				porto_alegre_net_pre_component_one_star
			WHERE the_geom && (SELECT box FROM bbox)',
			 ARRAY(SELECT net_id FROM weight_sampling_200_origin),
			 ARRAY(SELECT net_id FROM weight_sampling_200_destination),
			directed:=TRUE,
			heuristic:=2) j
			LEFT JOIN 
				porto_alegre_net_pre_component_one_star AS b
			ON 
				j.edge = b.id
			GROUP BY  
				b.id,
				b.the_geom
			ORDER BY 
				centrality DESC;   

```

### Method array_agg()

Code based on:

1. [Stackoverflow from "Timothy Dalton"](https://gis.stackexchange.com/questions/432035/pgrouting-computation-time-code-efficiency)

```{sql}
#| eval: false

---- Run query using array_agg()
EXPLAIN ANALYZE
CREATE TABLE porto_100_dijkstra_agg AS
WITH pgr_result AS (
  SELECT pgr_dijkstra('SELECT ogc_fid AS id,
	         fromid AS source,
			toid AS target,
			 weight AS cost FROM porto_alegre_net_largest',
    array_agg(fv), array_agg(tv), 
    directed := true
  ) FROM vertices_lookup_10 
) 
SELECT 
 b.ogc_fid,
 b.the_geom,
 count(the_geom) as centrality 
FROM pgr_result
LEFT JOIN porto_alegre_net_largest AS b
ON (pgr_dijkstra).edge = b.ogc_fid
GROUP BY 
	the_geom, b.ogc_fid
ORDER BY 
	centrality DESC;
---- Max centrality value
select max(centrality) FROM porto_100_dijkstra_agg ;
---- Number of rows
select count(*) FROM porto_100_dijkstra_agg ;
```

## Algorithm

### pgr_dijkstra


```{r}
data_algorithms_naive <- read.csv("/home/ricardo/heigit_bookdown/metrics_algorithm_naive.csv", sep=",")
ggplot(data_algorithms_naive,aes(x=number_od, y=time, group = algorithm)) +
  labs(x="Number of Origin-Destination (OD)",
       y="time (ms)",
       title="Pgrouting performance",
       subtitle="Naive method on dijkstra,bddijkstra, astar, bdastar") +
  geom_line(aes(color=algorithm)) +
   geom_point(aes(color=algorithm)) +
  theme_minimal()
```

### pgr_astrar()

```{sql}
#| eval: false

EXPLAIN ANALYZE
CREATE TABLE centrality_10_10_porto_astrar AS
SELECT  
	b.ogc_fid,
	 b.the_geom,
 	count(the_geom) as centrality 
FROM pgr_astar(
	'SELECT ogc_fid AS id,
			fromid AS source,
			toid AS target,
			weight AS cost,
			x1,
			y1,
			x2,
			y2
	FROM porto_alegre_net_largest_astar',
	ARRAY(SELECT net_id FROM  random_10_origin),
	ARRAY(SELECT net_id FROM  random_10_destination),
		     directed:=TRUE,
		     heuristic:=2) j
                      left JOIN porto_alegre_net_largest_astar AS b
                      ON j.edge = b.ogc_fid
                      GROUP BY  b.ogc_fid, b.the_geom
                      ORDER BY centrality DESC;  

--- check max centrality                     
SELECT max(centrality) FROM centrality_10_10_porto_astrar ;
--- check max rows
SELECT  count(*) FROM centrality_10_10_porto_astrar;
```

### pgr_bdijkstra
```{sql}
#| eval: false
#| connection: eisenberg_connection

```


### pgr_bdastar()
```{sql}
#| connection: eisenberg_connection
#| eval: false

EXPLAIN ANALYZE
CREATE TABLE centrality_100_100_porto_bdastrar AS 
SELECT    b.ogc_fid,
          b.the_geom,
          count(the_geom) as centrality
FROM 
  pgr_bdAstar(
      'SELECT
          ogc_fid AS id,
          fromid AS source,
          toid AS target,
          weight AS cost,
          x1,
          y1,
          x2,
          y2  
  FROM
    porto_alegre_net_largest_astar',
    ARRAY(SELECT net_id FROM  random_100_origin),
    ARRAY(SELECT net_id FROM  random_100_destination),
    directed:=TRUE,
    heuristic:=2) j
    LEFT JOIN 
      porto_alegre_net_largest_astar AS b
      ON j.edge = b.ogc_fid
      GROUP BY  b.ogc_fid, b.the_geom
      ORDER BY centrality DESC;  
```

### Other code 

```{sql}
#| eval: false
#| connection: eisenberg_connection

CREATE TABLE od_40420_snapped_origin AS
SELECT DISTINCT ON (net.id)
       pt.id AS pt_id,
       net.id AS net_id,
       net.the_geom
FROM 
(select * 
FROM 
    od_77763 as pt) as pt
CROSS JOIN
LATERAL (SELECT
        * 
        FROM porto_alegre_net_largest_vertices_pgr AS net
         ORDER BY net.the_geom <-> pt.geometry 
        LIMIT 1) AS net;

```

```{sql}
#| eval: false
#| connection: eisenberg_connection

CREATE TABLE random_272_destination  AS
with random_272_destination AS (
        SELECT
            * 
        FROM 
            od_40420_snapped_origin 
        ORDER BY random() LIMIT 200)
        SELECT * FROM  random_272_destination;  

```

```{sql}
#| eval: false
#| connection: eisenberg_connection

CREATE TEMP TABLE vertices_lookup_v5 
AS             
WITH all_pairs AS (
  SELECT f.net_id AS fid, f.the_geom as fgeom,
         t.net_id AS tid, t.the_geom as tgeom
    FROM random_272_origin AS f,
         random_272_destination AS t
),
vertices AS (
  SELECT fid, tid,
     (SELECT id
        FROM porto_alegre_net_largest_vertices_pgr AS way
       ORDER BY way.the_geom <-> fgeom
       LIMIT 1) as fv,
     (SELECT id
        FROM porto_alegre_net_largest_vertices_pgr AS way
       ORDER BY way.the_geom <-> tgeom
       LIMIT 1) as tv
  FROM all_pairs
)
SELECT * FROM vertices;

```

```{sql}
#| eval: false
#| connection: eisenberg_connection

CREATE TABLE porto_272_272_dijkstra AS
 WITH pgr_result AS (
   SELECT pgr_dijkstra('SELECT id,
           source,
    target,
     cost FROM porto_alegre_net_largest',
     array_agg(fv), array_agg(tv), 
     directed := true
   ) FROM vertices_lookup_v5
 )
SELECT (pgr_dijkstra).*, a.fid, a.tid FROM pgr_result
JOIN vertices_lookup_v5 a
ON (pgr_dijkstra).start_vid = a.fv
AND (pgr_dijkstra).end_vid = a.tv;

```

```{sql}
#| eval: false
#| connection: eisenberg_connection

CREATE TEMP TABLE vertices_lookup_v5                     
AS             
WITH all_pairs AS (
  SELECT f.net_id AS fid, f.the_geom as fgeom,
         t.net_id AS tid, t.the_geom as tgeom
    FROM random_272_origin AS f,                                                                      
         random_272_destination AS t               
),                                                                                     
vertices AS (
  SELECT fid, tid,
     (SELECT id
        FROM porto_alegre_net_largest_vertices_pgr AS way
       ORDER BY way.the_geom <-> fgeom
       LIMIT 1) as fv,
     (SELECT id
        FROM porto_alegre_net_largest_vertices_pgr AS way
       ORDER BY way.the_geom <-> tgeom
       LIMIT 1) as tv
  FROM all_pairs
)
SELECT * FROM vertices;

```

```{sql}
#| eval: false
#| connection: eisenberg_connection


CREATE TEMP TABLE vertices_lookup_v5                     
AS             
WITH all_pairs AS (
  SELECT f.net_id AS fid, f.the_geom as fgeom,
         t.net_id AS tid, t.the_geom as tgeom
    FROM random_272_origin AS f,                                                                      
         random_272_destination AS t               
),                                                                                     
vertices AS (
  SELECT fid, tid,
     (SELECT id
        FROM porto_alegre_net_largest_vertices_pgr AS way
       ORDER BY way.the_geom <-> fgeom
       LIMIT 1) as fv,
     (SELECT id
        FROM porto_alegre_net_largest_vertices_pgr AS way
       ORDER BY way.the_geom <-> tgeom
       LIMIT 1) as tv
  FROM all_pairs
)
SELECT * FROM vertices;

```