Raw Analysis International Meteor Organization

1999-2020



Imports

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from scipy import stats
import plotly.express as px
from geopy.geocoders import Nominatim
import geopy as gp
from datetime import datetime
from glob import glob

Data

In [4]:
filenames = glob('/Users/aleia/dev/Impacts /Project-Impact-2020/data/IMO/data/*.csv')
imo9920 = pd.concat([pd.read_csv(f,  sep=';') for f in filenames], ignore_index=True)

Preprocessing

In [7]:
imo9920.to_csv('IMO9920.csv', index = False)

EDA

In [5]:
print(imo9920.shape, '\n')
print(imo9920.dtypes, '\n')
print(imo9920.describe, '\n')
print (type(imo9920), '\n')
print(imo9920.columns)
(42989, 11) 

Session ID                int64
Start Date               object
Observer ID             float64
Submitter ID              int64
Actual Observer Name     object
Submitted by             object
City                     object
Country                  object
Latitude                float64
Longitude               float64
Elevation               float64
dtype: object 

<bound method NDFrame.describe of        Session ID           Start Date  Observer ID  Submitter ID  \
0             162  2008-12-19 09:08:00        283.0           283   
1             163  2008-12-22 05:20:00        283.0           283   
2             164  2008-12-31 10:47:00        283.0           283   
3             311  2008-08-11 21:50:00        630.0           630   
4             918  2008-08-11 23:00:00        710.0           710   
...           ...                  ...          ...           ...   
42984       75686  2005-01-10 13:15:00       7956.0          7956   
42985       75687  2005-01-13 13:00:00       7956.0          7956   
42986       75691  2005-07-29 11:10:00       7956.0          7956   
42987       75692  2005-07-30 14:55:00       7956.0          7956   
42988       78769  2005-03-09 19:37:00       1651.0          1651   

      Actual Observer Name      Submitted by                  City    Country  \
0         Salvador Aguirre  Salvador Aguirre  Ej. El Carmen, Sonor     Mexico   
1         Salvador Aguirre  Salvador Aguirre  Ej. El Carmen, Sonor     Mexico   
2         Salvador Aguirre  Salvador Aguirre  Ej. El Carmen, Sonor     Mexico   
3          Reyhane Akhbari   Reyhane Akhbari           Hesar Sorkh       Iran   
4           Igor Arolovich    Igor Arolovich         Mount Tayasim     Israel   
...                    ...               ...                   ...        ...   
42984           Adam Marsh        Adam Marsh    Pakenham, Victoria  Australia   
42985           Adam Marsh        Adam Marsh    Pakenham, Victoria  Australia   
42986           Adam Marsh        Adam Marsh            Ladys Pass  Australia   
42987           Adam Marsh        Adam Marsh            Ladys Pass  Australia   
42988       Jaroslaw Dygos    Jaroslaw Dygos       Czernice Borowe     Poland   

        Latitude   Longitude  Elevation  
0      29.319444 -110.822778        0.0  
1      29.319444 -110.822778        0.0  
2      29.319444 -110.822778        0.0  
3      36.005556   58.321111        0.0  
4      31.766667   35.083333        0.0  
...          ...         ...        ...  
42984 -38.000000  145.000000      100.0  
42985 -38.000000  145.000000      100.0  
42986 -36.816110  144.680991      233.0  
42987 -36.816110  144.680991      233.0  
42988  53.033333   20.716667      150.0  

[42989 rows x 11 columns]> 

<class 'pandas.core.frame.DataFrame'> 

Index(['Session ID', 'Start Date', 'Observer ID', 'Submitter ID',
       'Actual Observer Name', 'Submitted by', 'City', 'Country', 'Latitude',
       'Longitude', 'Elevation'],
      dtype='object')
In [24]:
sns.heatmap(imo9920.corr())
Out[24]:
<matplotlib.axes._subplots.AxesSubplot at 0x110961790>
In [ ]: