Spatial Analysis of Atmospheric Pressure Data Along a Road Segment

Measuring atmospheric pressure along road segments is crucial for understanding microclimate variations, improving weather forecasts, and enhancing road safety. This data, collected using barometric sensors, GPS, and Automated Weather Stations (AWS), helps monitor environmental conditions and detect weather-related hazards.

I have some measurements of atmospheric pressure along a road segment and wish to map and analyze them. Each measurement includes a timestamp, coordinates, and pressure values. This blog will provide three separate Python scripts that accomplish the following tasks:

Sample the Data at Equal Distances: Remove duplication by sampling the data at equal distances (5-10 meters).
Create a Map of Measurements: Visualize the measurements along the road segment, using interpolation if necessary to account for fewer data points in future collections.
Perform Geo-Temporal Clustering Analysis: Analyze the data to identify clusters of similar atmospheric pressure readings over time and space.

Download dataset here used in this tutorial

Here’s a step-by-step tutorial based on our requirements and provided Python code:

Step 1: Sampling Data at Equal Distances

The goal is to reduce data points to avoid duplication, sampling them at a distance threshold of 5-10 meters.

Code:

import pandas as pd
import numpy as np
from geopy.distance import geodesic

# Load the data
data = pd.read_csv('sample_baro.csv')


#  loading the data and sampling it at equal distances.

def haversine(lat1, lon1, lat2, lon2):
    return geodesic((lat1, lon1), (lat2, lon2)).meters

def sample_data(data, distance_threshold=5):
    sampled_data = []
    last_sampled_point = data.iloc[0]
    sampled_data.append(last_sampled_point)
    
    for i in range(1, len(data)):
        current_point = data.iloc[i]
        distance = haversine(last_sampled_point['Lat'], last_sampled_point['Long'],
                             current_point['Lat'], current_point['Long'])
        if distance >= distance_threshold:
            sampled_data.append(current_point)
            last_sampled_point = current_point
            
    return pd.DataFrame(sampled_data)

sampled_data = sample_data(data, 5)
sampled_data.to_csv('sampled_data.csv', index=False)
print(sampled_data)

import pandas as pd

import numpy as np

from geopy.distance import geodesic

# Load the data

data = pd.read_csv('sample_baro.csv')

# loading the data and sampling it at equal distances.

def haversine(lat1, lon1, lat2, lon2):

return geodesic((lat1, lon1), (lat2, lon2)).meters

def sample_data(data, distance_threshold=5):

sampled_data = []

last_sampled_point = data.iloc[0]

sampled_data.append(last_sampled_point)

for i in range(1, len(data)):

current_point = data.iloc[i]

distance = haversine(last_sampled_point['Lat'], last_sampled_point['Long'],

current_point['Lat'], current_point['Long'])

if distance >= distance_threshold:

sampled_data.append(current_point)

last_sampled_point = current_point

return pd.DataFrame(sampled_data)

sampled_data = sample_data(data, 5)

sampled_data.to_csv('sampled_data.csv', index=False)

print(sampled_data)

Step 2: Creating a Map Showing the Measurements

The goal is to visualize the pressure measurements along the road segment on a map.

Code:

import folium
from scipy.interpolate import griddata

def create_map(data):
    # Create a map centered around the midpoint of the data
    map_center = [data['Lat'].mean(), data['Long'].mean()]
    my_map = folium.Map(location=map_center, zoom_start=15)
    
    # Add the data points to the map
    for i, row in data.iterrows():
        folium.CircleMarker(
            location=[row['Lat'], row['Long']],
            radius=5,
            popup=f"Pressure: {row['Pressure (hPa)']} hPa",
            color='blue',
            fill=True,
            fill_color='blue'
        ).add_to(my_map)
    
    # Save the map
    my_map.save('pressure_map.html')

# Interpolate data for visualization purposes
def interpolate_data(data, num_points=100):
    lat_min, lat_max = data['Lat'].min(), data['Lat'].max()
    long_min, long_max = data['Long'].min(), data['Long'].max()
    
    grid_lat, grid_long = np.mgrid[lat_min:lat_max:complex(num_points), long_min:long_max:complex(num_points)]
    points = data[['Lat', 'Long']].values
    values = data['Pressure (hPa)'].values
    
    grid_pressure = griddata(points, values, (grid_lat, grid_long), method='linear')
    
    return grid_lat, grid_long, grid_pressure

# Create map with interpolated data
def create_interpolated_map(data):
    grid_lat, grid_long, grid_pressure = interpolate_data(data)
    
    # Create a map centered around the midpoint of the data
    map_center = [data['Lat'].mean(), data['Long'].mean()]
    my_map = folium.Map(location=map_center, zoom_start=15)
    
    # Add the interpolated data to the map
    for i in range(len(grid_lat)):
        for j in range(len(grid_long)):
            if np.isnan(grid_pressure[i, j]):
                continue
            folium.CircleMarker(
                location=[grid_lat[i, j], grid_long[i, j]],
                radius=2,
                popup=f"Pressure: {grid_pressure[i, j]:.2f} hPa",
                color='blue',
                fill=True,
                fill_color='blue'
            ).add_to(my_map)
    
    # Save the map
    my_map.save('interpolated_pressure_map.html')

create_map(sampled_data)
create_interpolated_map(sampled_data)

import folium

from scipy.interpolate import griddata

def create_map(data):

# Create a map centered around the midpoint of the data

map_center = [data['Lat'].mean(), data['Long'].mean()]

my_map = folium.Map(location=map_center, zoom_start=15)

# Add the data points to the map

for i, row in data.iterrows():

folium.CircleMarker(

location=[row['Lat'], row['Long']],

radius=5,

popup=f"Pressure: {row['Pressure (hPa)']} hPa",

color='blue',

fill=True,

fill_color='blue'

).add_to(my_map)

# Save the map

my_map.save('pressure_map.html')

# Interpolate data for visualization purposes

def interpolate_data(data, num_points=100):

lat_min, lat_max = data['Lat'].min(), data['Lat'].max()

long_min, long_max = data['Long'].min(), data['Long'].max()

grid_lat, grid_long = np.mgrid[lat_min:lat_max:complex(num_points), long_min:long_max:complex(num_points)]

points = data[['Lat', 'Long']].values

values = data['Pressure (hPa)'].values

grid_pressure = griddata(points, values, (grid_lat, grid_long), method='linear')

return grid_lat, grid_long, grid_pressure

# Create map with interpolated data

def create_interpolated_map(data):

grid_lat, grid_long, grid_pressure = interpolate_data(data)

# Create a map centered around the midpoint of the data

map_center = [data['Lat'].mean(), data['Long'].mean()]

my_map = folium.Map(location=map_center, zoom_start=15)

# Add the interpolated data to the map

for i in range(len(grid_lat)):

for j in range(len(grid_long)):

if np.isnan(grid_pressure[i, j]):

continue

folium.CircleMarker(

location=[grid_lat[i, j], grid_long[i, j]],

radius=2,

popup=f"Pressure: {grid_pressure[i, j]:.2f} hPa",

color='blue',

fill=True,

fill_color='blue'

).add_to(my_map)

# Save the map

my_map.save('interpolated_pressure_map.html')

create_map(sampled_data)

create_interpolated_map(sampled_data)

Step 3: Clustering Geo-Temporal Data

The goal is to perform clustering on the data considering geographical and temporal proximity.

Code:

from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

def cluster_geo_temporal_data(data, eps=0.01, min_samples=2):
    # Combine lat, long, and timestamp for clustering
    coords = data[['Lat', 'Long']].values
    timestamps = pd.to_datetime(data['Timestamp']).astype('int64') / 10**9
    features = np.hstack((coords, timestamps.values.reshape(-1, 1)))
    
    # Standardize features
    scaler = StandardScaler()
    scaled_features = scaler.fit_transform(features)
    
    # Perform DBSCAN clustering
    db = DBSCAN(eps=eps, min_samples=min_samples).fit(scaled_features)
    labels = db.labels_
    
    # Add labels to the data
    data['Cluster'] = labels
    data.to_csv('clustered_data.csv', index=False)
    
    # Plot the clusters
    plt.figure(figsize=(10, 6))
    unique_labels = set(labels)
    for label in unique_labels:
        class_member_mask = (labels == label)
        xy = data[class_member_mask]
        plt.plot(xy['Long'], xy['Lat'], 'o', markersize=5, label=f'Cluster {label}')
    plt.title('DBSCAN Clustering')
    plt.xlabel('Longitude')
    plt.ylabel('Latitude')
    plt.legend()
    plt.savefig('clusters.png')
    plt.show()

cluster_geo_temporal_data(sampled_data)

from sklearn.cluster import DBSCAN

from sklearn.preprocessing import StandardScaler

import matplotlib.pyplot as plt

def cluster_geo_temporal_data(data, eps=0.01, min_samples=2):

# Combine lat, long, and timestamp for clustering

coords = data[['Lat', 'Long']].values

timestamps = pd.to_datetime(data['Timestamp']).astype('int64') / 10**9

features = np.hstack((coords, timestamps.values.reshape(-1, 1)))

# Standardize features

scaler = StandardScaler()

scaled_features = scaler.fit_transform(features)

# Perform DBSCAN clustering

db = DBSCAN(eps=eps, min_samples=min_samples).fit(scaled_features)

labels = db.labels_

# Add labels to the data

data['Cluster'] = labels

data.to_csv('clustered_data.csv', index=False)

# Plot the clusters

plt.figure(figsize=(10, 6))

unique_labels = set(labels)

for label in unique_labels:

class_member_mask = (labels == label)

xy = data[class_member_mask]

plt.plot(xy['Long'], xy['Lat'], 'o', markersize=5, label=f'Cluster {label}')

plt.title('DBSCAN Clustering')

plt.xlabel('Longitude')

plt.ylabel('Latitude')

plt.legend()

plt.savefig('clusters.png')

plt.show()

cluster_geo_temporal_data(sampled_data)

Summary

Data Sampling: The first script samples the data at equal distances to reduce duplication.
Mapping: The second set of scripts creates maps to visualize the measurements and interpolated data.
Clustering: The third script performs geo-temporal clustering on the data and visualizes the clusters.

Ensure you have the necessary libraries installed:

pip install pandas numpy geopy folium scipy scikit-learn matplotlib

1	pip install pandas numpy geopy folium scipy scikit-learn matplotlib

Save the data to the respective output files (sampled_data.csv, pressure_map.html, interpolated_pressure_map.html, clustered_data.csv, clusters.png) after running each part of the script.

Download dataset here used in this tutorial

Step 1: Sampling Data at Equal Distances

Code:

Step 2: Creating a Map Showing the Measurements

Code:

Step 3: Clustering Geo-Temporal Data

Code:

Summary

Related Posts

Leave a ReplyCancel reply

Discover more from Spatial Dev Guru

Discover more from Spatial Dev Guru