import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from scipy import stats
import plotly.express as px
from geopy.geocoders import Nominatim
import geopy as gp
from datetime import datetime
data = pd.read_csv('data/meteorite-landings.csv')
print(data.columns)
data.head()
print("Data described: \n")
print(data.describe())
print('\n')
print("Data info: \n")
print(data.info())
print('\n')
print("Data types: \n")
print(data.dtypes)
data.nametype.value_counts()
# Under NameType, 'valid' is for most meteorites and 'relict'
# are for objects that were once meteorites but are now highly
# altered by weathering on
# rename columns
data.rename(columns={'recclass':'class', 'reclat':'lat', 'reclong':'long', 'mass (g)':'mass'}, inplace=True)
data.head()
# *****************************
# HIGHLY IMPORTANT
# *****************************
# Sample data
print("Original Data Stats: \n")
print(data.describe())
print('\n--------\n')
print("New Sample Data Stats: \n")
data['year'].fillna(0).astype(int)
data['mass'].fillna(0).astype(int)
data = data.sample(frac=0.1) # 10% sample set
print(data.describe())
data['fall'].hist(bins=3) #
plt.show()
top_10_class = data['class'].value_counts()[:10]
plt.bar(top_10_class, height = 1)
top_10_class.plot(kind='bar')
geolocator = Nominatim(user_agent="project_impact")
lists = []
for i in range(20):
lats = data['lat'].get(key = i)
longs = data['long'].get(key = i)
coor = gp.Point(lats, longs)
country = geolocator.reverse(gp.Point(coor)).raw['address'].get('country')
lists.append(country)
print(lists)
fig, ax = plt.subplots(figsize=(16,8))
ax.scatter(data['year'], data['mass'])
plt.show()
axes = plt.gca()
axes.set_ylim([-90,90])
above_equator = data[data.lat >0].shape[0]
at_equator = data[data.lat ==0].shape[0]
below_equator = data[data.lat <0].shape[0]
print("Above Equator:", above_equator, '\n')
print("At Equator:", at_equator, '\n')
print("Below Equator:", below_equator, '\n')
labels = ["Above", 'At', 'Below']
values = [above_equator, at_equator, below_equator]
plt.pie(values, labels=labels)
plt.show()