4.8. Python skills check#

You should work through this before the tutorial. The idea is for you to find the relevant code snippets int he worked examples you have just read, and modify them to work for your requirements here

Oxford weather station data#

We will work with historical data from the Oxford weather station

Set up Python libraries#

As usual, run the code cell below to import the relevant Python libraries

# Set-up Python libraries - you need to run this but you don't need to change it
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import pandas 
import seaborn as sns
sns.set_theme()

Load and inspect the data#

Let’s load some historical data about the weather in Oxford, from the file “OxfordWeather.csv”

weather = pandas.read_csv("https://raw.githubusercontent.com/jillxoreilly/StatsCourseBook/main/data/OxfordWeather.csv")
display(weather)
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
Cell In[2], line 1
----> 1 weather = pandas.read_csv("https://raw.githubusercontent.com/jillxoreilly/StatsCourseBook/main/data/OxfordWeather.csv")
      2 display(weather)

File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/io/parsers/readers.py:912, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)
    899 kwds_defaults = _refine_defaults_read(
    900     dialect,
    901     delimiter,
   (...)
    908     dtype_backend=dtype_backend,
    909 )
    910 kwds.update(kwds_defaults)
--> 912 return _read(filepath_or_buffer, kwds)

File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/io/parsers/readers.py:577, in _read(filepath_or_buffer, kwds)
    574 _validate_names(kwds.get("names", None))
    576 # Create the parser.
--> 577 parser = TextFileReader(filepath_or_buffer, **kwds)
    579 if chunksize or iterator:
    580     return parser

File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/io/parsers/readers.py:1407, in TextFileReader.__init__(self, f, engine, **kwds)
   1404     self.options["has_index_names"] = kwds["has_index_names"]
   1406 self.handles: IOHandles | None = None
-> 1407 self._engine = self._make_engine(f, self.engine)

File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/io/parsers/readers.py:1661, in TextFileReader._make_engine(self, f, engine)
   1659     if "b" not in mode:
   1660         mode += "b"
-> 1661 self.handles = get_handle(
   1662     f,
   1663     mode,
   1664     encoding=self.options.get("encoding", None),
   1665     compression=self.options.get("compression", None),
   1666     memory_map=self.options.get("memory_map", False),
   1667     is_text=is_text,
   1668     errors=self.options.get("encoding_errors", "strict"),
   1669     storage_options=self.options.get("storage_options", None),
   1670 )
   1671 assert self.handles is not None
   1672 f = self.handles.handle

File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/io/common.py:716, in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
    713     codecs.lookup_error(errors)
    715 # open URLs
--> 716 ioargs = _get_filepath_or_buffer(
    717     path_or_buf,
    718     encoding=encoding,
    719     compression=compression,
    720     mode=mode,
    721     storage_options=storage_options,
    722 )
    724 handle = ioargs.filepath_or_buffer
    725 handles: list[BaseBuffer]

File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/io/common.py:373, in _get_filepath_or_buffer(filepath_or_buffer, encoding, compression, mode, storage_options)
    370         if content_encoding == "gzip":
    371             # Override compression based on Content-Encoding header
    372             compression = {"method": "gzip"}
--> 373         reader = BytesIO(req.read())
    374     return IOArgs(
    375         filepath_or_buffer=reader,
    376         encoding=encoding,
   (...)
    379         mode=fsspec_mode,
    380     )
    382 if is_fsspec_url(filepath_or_buffer):

File ~/opt/anaconda3/lib/python3.9/http/client.py:476, in HTTPResponse.read(self, amt)
    474 else:
    475     try:
--> 476         s = self._safe_read(self.length)
    477     except IncompleteRead:
    478         self._close_conn()

File ~/opt/anaconda3/lib/python3.9/http/client.py:626, in HTTPResponse._safe_read(self, amt)
    624 s = []
    625 while amt > 0:
--> 626     chunk = self.fp.read(min(amt, MAXAMOUNT))
    627     if not chunk:
    628         raise IncompleteRead(b''.join(s), amt)

File ~/opt/anaconda3/lib/python3.9/socket.py:704, in SocketIO.readinto(self, b)
    702 while True:
    703     try:
--> 704         return self._sock.recv_into(b)
    705     except timeout:
    706         self._timeout_occurred = True

File ~/opt/anaconda3/lib/python3.9/ssl.py:1275, in SSLSocket.recv_into(self, buffer, nbytes, flags)
   1271     if flags != 0:
   1272         raise ValueError(
   1273           "non-zero flags not allowed in calls to recv_into() on %s" %
   1274           self.__class__)
-> 1275     return self.read(nbytes, buffer)
   1276 else:
   1277     return super().recv_into(buffer, nbytes, flags)

File ~/opt/anaconda3/lib/python3.9/ssl.py:1133, in SSLSocket.read(self, len, buffer)
   1131 try:
   1132     if buffer is not None:
-> 1133         return self._sslobj.read(len, buffer)
   1134     else:
   1135         return self._sslobj.read(len)

KeyboardInterrupt: 

Correlation and covariance#

Create a plot to show the relationship between mean temp (Tmean) and rainfall

# your code here
<AxesSubplot:xlabel='Tmean', ylabel='Rainfall_mm'>
_images/e29272c33f4c351ee865d095d59371e9e7253c77b7e321e534aac1397bc7355b.png

If I want to measure the correlation between Tmean and rainfall, which method should I use?

Using the appropriate method, obtain the full correlation matrix for the weather dataframe

# your code here
YYYY MM DD Tmax Tmin Tmean Trange Rainfall_mm
YYYY 1.000000 -0.003398 -0.000058 0.063688 0.083421 0.074960 -0.000478 0.055859
MM -0.003398 1.000000 0.010523 0.180221 0.237769 0.213777 -0.016633 0.025164
DD -0.000058 0.010523 1.000000 0.002354 0.003191 0.002587 -0.002329 -0.001199
Tmax 0.063688 0.180221 0.002354 1.000000 0.843880 0.967582 0.573153 -0.079592
Tmin 0.083421 0.237769 0.003191 0.843880 1.000000 0.950769 0.075645 0.061870
Tmean 0.074960 0.213777 0.002587 0.967582 0.950769 1.000000 0.362483 -0.019287
Trange -0.000478 -0.016633 -0.002329 0.573153 0.075645 0.362483 1.000000 -0.234269
Rainfall_mm 0.055859 0.025164 -0.001199 -0.079592 0.061870 -0.019287 -0.234269 1.000000

… and just the correlation between Tmean and rainfall

# your code here
-0.019287092574398337

Obtain the covariance between Tmin and Tmax

# your code here
28.59261194330108

The end!#