import pandas as pd
import geopandas as gpd
import numpy as np
import json
import folium
from folium.plugins import MarkerCluster
# Load the film permits data
film_permits = pd.read_csv('Dataset/nyc_film_permits.csv')
# Load the borough boundaries data
with open('Dataset/nyc_borough_boundaries.geojson') as f:
borough_boundaries_data = json.load(f)
borough_boundaries = gpd.GeoDataFrame.from_features(borough_boundaries_data)
# Load the postal codes data
postal_codes = pd.read_csv('Dataset/us_postalcode.csv', usecols=['zip', 'lat', 'lng'])
# Rename the columns to match the column names used before
postal_codes = postal_codes.rename(columns={'zip': 'PostalCode', 'lat': 'Latitude', 'lng': 'Longitude'})
# print(film_permits['Borough'].unique())
# print(borough_boundaries['boro_name'].unique())
# Keep only necessary columns
film_permits = film_permits[['Borough', 'ZipCode(s)']]
# Convert 'ZipCode(s)' to string type
film_permits['ZipCode(s)'] = film_permits['ZipCode(s)'].astype(str)
# Handle rows with multiple zip codes (split on comma, strip whitespace, and expand into separate rows)
film_permits = film_permits.assign(**{'ZipCode(s)': film_permits['ZipCode(s)'].str.split(',').apply(lambda x: [i.strip() for i in x])}).explode('ZipCode(s)')
film_permits = film_permits[film_permits['ZipCode(s)'].str.len() == 5]
# # Convert 'nan' strings to actual NaN values
# film_permits['ZipCode(s)'] = film_permits['ZipCode(s)'].replace('nan', np.nan)
# # Drop rows with 'nan' or empty values in 'ZipCode(s)'
# film_permits = film_permits[film_permits['ZipCode(s)'].notna()]
film_permits['ZipCode(s)'] = film_permits['ZipCode(s)'].astype(int)
# Keep only rows with postal codes in film permits data
postal_codes = postal_codes[postal_codes['PostalCode'].isin(film_permits['ZipCode(s)'].unique())]
# Keep only necessary columns
postal_codes = postal_codes[['PostalCode', 'Latitude', 'Longitude']]
PostalCode | Latitude | Longitude | |
2583 | 10001 | 40.75064 | -73.99728 |
2584 | 10002 | 40.71597 | -73.98692 |
2585 | 10003 | 40.73184 | -73.98915 |
# Merge data
film_permits = film_permits.merge(postal_codes, left_on='ZipCode(s)', right_on='PostalCode')
film_permits = film_permits.merge(borough_boundaries, left_on='Borough', right_on='boro_name')
Borough | ZipCode(s) | PostalCode | Latitude | Longitude | geometry | boro_code | boro_name | shape_area | shape_leng | |
0 | Brooklyn | 11222 | 11222 | 40.72862 | -73.94764 | MULTIPOLYGON (((-73.86327 40.58388, -73.86381 ... | 3 | Brooklyn | 1934138258.43 | 728148.53241 |
1 | Brooklyn | 11222 | 11222 | 40.72862 | -73.94764 | MULTIPOLYGON (((-73.86327 40.58388, -73.86381 ... | 3 | Brooklyn | 1934138258.43 | 728148.53241 |
import matplotlib.pyplot as plt
# Extract the longitude and latitude values
longitude_values = film_permits['Longitude']
latitude_values = film_permits['Latitude']
# Define the size and the bins for the histogram
heatmap, xedges, yedges = np.histogram2d(longitude_values, latitude_values, bins=[100,100])
# Plot the heatmap
plt.imshow(heatmap.T, origin='lower')
# Initialize the map
m = folium.Map(location=[40.7128, -74.0060], zoom_start=11) # coordinates for NYC
# Add film permits locations to the map
for idx, row in film_permits.iterrows():
folium.CircleMarker([row['Latitude'], row['Longitude']], radius=1).add_to(m)
# Display the map