import pandas as pd
import geopandas as gpd
import numpy as np
import json
import folium
from folium.plugins import MarkerCluster


# Load the film permits data
film_permits = pd.read_csv('Dataset/nyc_film_permits.csv')

# Load the borough boundaries data
with open('Dataset/nyc_borough_boundaries.geojson') as f:
    borough_boundaries_data = json.load(f)
borough_boundaries = gpd.GeoDataFrame.from_features(borough_boundaries_data)

# Load the postal codes data
postal_codes = pd.read_csv('Dataset/us_postalcode.csv', usecols=['zip', 'lat', 'lng'])

# Rename the columns to match the column names used before
postal_codes = postal_codes.rename(columns={'zip': 'PostalCode', 'lat': 'Latitude', 'lng': 'Longitude'})

postal_codes.head()

# print(film_permits['Borough'].unique())
# print(borough_boundaries['boro_name'].unique())
print(film_permits['ZipCode(s)'].unique())
print(postal_codes['PostalCode'].unique())

['11222' '11237, 11385' '11206' ... '11356, 11357, 11420'
 '10002, 10012, 11231' '10002, 10017, 10036, 10173']
[  601   602   603 ... 99926 99927 99929]


# Keep only necessary columns
film_permits = film_permits[['Borough', 'ZipCode(s)']]

# Convert 'ZipCode(s)' to string type
film_permits['ZipCode(s)'] = film_permits['ZipCode(s)'].astype(str)

# Handle rows with multiple zip codes (split on comma, strip whitespace, and expand into separate rows)
film_permits = film_permits.assign(**{'ZipCode(s)': film_permits['ZipCode(s)'].str.split(',').apply(lambda x: [i.strip() for i in x])}).explode('ZipCode(s)')
film_permits = film_permits[film_permits['ZipCode(s)'].str.len() == 5]
# # Convert 'nan' strings to actual NaN values
# film_permits['ZipCode(s)'] = film_permits['ZipCode(s)'].replace('nan', np.nan)
# # Drop rows with 'nan' or empty values in 'ZipCode(s)'
# film_permits = film_permits[film_permits['ZipCode(s)'].notna()]
film_permits['ZipCode(s)'] = film_permits['ZipCode(s)'].astype(int)
print(film_permits['ZipCode(s)'].unique())

[11222 11237 11385 11206 10007 10013 10282 10014 11105 10312 10012 10011
 10002 10016 10018 10038 10029 10019 10020 11378 11223 10463 10473 10028
 10128 11101 11109 11211 10003 10024 10023 11201 11207 10022 10152 11217
 11218 10112 10001 10121 10031 10032 10025 10026 11226 11379 10036 10119
 10457 10458 10471 11205 10027 10035 11215 10106 10010 10017 11231 10021
 10065 11106 10030 10004 10006 11203 11104 11377 11224 11375 10462 11365
 11366 10075 11249 11368 11373 10454 11220 11232 11357 11359 11430 11434
 11436 10466 11418 10309 11356 10044 10105 10173 11216 10005 11367 11374
 11102 11230 11372 11432 11213 10278 11234 11209 11361    83 10470 11225
 10033 11221 11208 11239 11362 11363 10045 11251 10171 11236 11238 10305
 10308 10009 11235 10118 10165 10103 11435 11103 11354 11415 11419 11420
 10041 11229 10474 10452 10037 10467 10170 10451 11214 11228 10465 10301
 11233 10306 10281 11691 10461 10034 11416 11212 11210 10456 10304 10115
 10040 10455 10459 10280 10111 11355 10472 10310 11693 11694 10162 10279
 10154 10158 11421 11358 11364 11427 10172 10271 10464 10069 11692 10303
 10155 10167 10177 10048 10039 10153 11417 10097 10460 11433 10302 11360
 11204 11414 10110 10175 10176 11004 11040 10314 10120 11038 11370 11219
 10453 10468 10307 11695 11369 11371 10000 10469 11697 10475 11423]


# Keep only rows with postal codes in film permits data
postal_codes = postal_codes[postal_codes['PostalCode'].isin(film_permits['ZipCode(s)'].unique())]

# Keep only necessary columns
postal_codes = postal_codes[['PostalCode', 'Latitude', 'Longitude']]
postal_codes.head(3)


# Merge data
film_permits = film_permits.merge(postal_codes, left_on='ZipCode(s)', right_on='PostalCode')
film_permits = film_permits.merge(borough_boundaries, left_on='Borough', right_on='boro_name')
film_permits.head(2)


import matplotlib.pyplot as plt

# Extract the longitude and latitude values
longitude_values = film_permits['Longitude']
latitude_values = film_permits['Latitude']

# Define the size and the bins for the histogram
heatmap, xedges, yedges = np.histogram2d(longitude_values, latitude_values, bins=[100,100])

# Plot the heatmap
plt.imshow(heatmap.T, origin='lower')
plt.show()


# Initialize the map
m = folium.Map(location=[40.7128, -74.0060], zoom_start=11)  # coordinates for NYC

# Add film permits locations to the map
for idx, row in film_permits.iterrows():
    folium.CircleMarker([row['Latitude'], row['Longitude']], radius=1).add_to(m)

# Display the map
m


# Aggregate film permits by borough
borough_counts = film_permits['Borough'].value_counts().reset_index()
borough_counts.columns = ['Borough', 'Count']

# Load GeoJSON file
with open('Dataset/nyc_borough_boundaries.geojson') as f:
    nyc_geo = json.load(f)

# Normalize borough names in GeoJSON file (they are in uppercase)
for i in range(len(nyc_geo['features'])):
    nyc_geo['features'][i]['properties']['boro_name'] = nyc_geo['features'][i]['properties']['boro_name'].title()

# Create map
m = folium.Map(location=[40.7128, -74.0060], zoom_start=11)  # coordinates for NYC

# Add Choropleth layer
choropleth = folium.Choropleth(
    geo_data=nyc_geo,
    name='choropleth',
    data=borough_counts,
    columns=['Borough', 'Count'],
    key_on='feature.properties.boro_name',
    fill_color='YlGn',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Film Permits Count'
).add_to(m)

# Add labels
choropleth.geojson.add_child(
    folium.features.GeoJsonTooltip(['boro_name'], labels=False)
)

m


from folium.plugins import HeatMap

# Define the initial map
m = folium.Map(location=[40.7128, -74.0060], zoom_start=11, title="Filming spots Heatmap")  # coordinates for NYC

# List comprehension to make out list of lists
heat_data = [[row['Latitude'], row['Longitude']] for index, row in film_permits.iterrows()]

# Add the heatmap to the map
HeatMap(heat_data,radius=20, max_zoom=13, blur=10).add_to(m)

# Display the map
m


with open("Dataset/Points_Of_Interest.geojson", "r") as file:
    data = json.load(file)
coords = [[feature["geometry"]["coordinates"][1], feature["geometry"]["coordinates"][0]] for feature in data["features"]]

# Creating the base map
m = folium.Map(location=[40.730610, -73.935242], zoom_start=12,title="Points of interests Heatmap")  # Centering map on NYC

# Adding the heatmap to the map
HeatMap(coords, radius=5, max_zoom=14, blur=5).add_to(m)

# Display the map
m


# Load the data from the geojson file
with open("Dataset/Bi_Annual_Pedestrian_Counts.geojson", "r") as file:
    data = json.load(file)

# Extract coordinates and average pedestrian count for each location
locations = []
for feature in data["features"]:
    coords = feature["geometry"]["coordinates"]
    
    # Calculate the average pedestrian count
    # Filter out properties that aren't counts (like street names, etc.) and convert to integers
    counts = [int(value) for key, value in feature["properties"].items() if isinstance(value, str) and value.isdigit()]
    
    if counts:  # check if counts list is not empty
        avg_count = np.mean(counts)
        locations.append([coords[1], coords[0], avg_count])  # [latitude, longitude, avg_count]

# Create the base map
m = folium.Map(location=[40.730610, -73.935242], zoom_start=12)  # Centering map on NYC


# Add the heatmap
HeatMap(locations, radius=15, max_zoom=13,blur=10).add_to(m)

# Display the map
m

	PostalCode	Latitude	Longitude
2583	10001	40.75064	-73.99728
2584	10002	40.71597	-73.98692
2585	10003	40.73184	-73.98915

	Borough	ZipCode(s)	PostalCode	Latitude	Longitude	geometry	boro_code	boro_name	shape_area	shape_leng
0	Brooklyn	11222	11222	40.72862	-73.94764	MULTIPOLYGON (((-73.86327 40.58388, -73.86381 ...	3	Brooklyn	1934138258.43	728148.53241
1	Brooklyn	11222	11222	40.72862	-73.94764	MULTIPOLYGON (((-73.86327 40.58388, -73.86381 ...	3	Brooklyn	1934138258.43	728148.53241