import numpy as np
import scipy
import matplotlib
import sklearn
import pandas as pd

versions = (
    ('numpy', np.__version__),
    ('scipy', scipy.__version__),
    ('matplotlib', matplotlib.__version__),
    ('sklearn', sklearn.__version__),
    ('pandas', pd.__version__)
)

# this syntax is very useful when you concatenate a sequence of words with intervening separators
str = '\n'.join("{0}.version={1}".format(pkgname, ver)
                for (pkgname, ver) in versions)
print(str)

numpy.version=1.18.1
scipy.version=1.4.1
matplotlib.version=3.1.3
sklearn.version=0.21.2
pandas.version=0.25.0


# printing comma separated items
print("Hello CSCI", 213*20)

# better syntax for output formatting
print("Hello {0} {1}".format("CSCI", 213*20))
print("Hello {1} {0}".format("CSCI", 213*20))

# you can also use keyword arguments
print("Hello {deptid} {course_num}".format(deptid="CSCI", course_num=4260))

# positional arguments
print("Hello {} {}".format("CSCI", 4260))


# You can tell the python interpreter to use a specific data type.
# For more about format string sytax, see https://docs.python.org/3.4/library/string.html#formatspec

print("Hello {0:s} {1:d}".format("CSCI", 4260))
# print up to 2 digits after the decimal point
print("Hello {0:s} {1:.2f}".format("CSCI", 4260))
# 10 digit integer (right aligned by default)
print("Hello {0:s} {1:10d}".format("CSCI", 4260))
# to align left
print("Hello {0:s} {1:<10d}".format("CSCI", 4260))

Hello CSCI 4260
Hello CSCI 4260
Hello 4260 CSCI
Hello CSCI 4260
Hello CSCI 4260
Hello CSCI 4260
Hello CSCI 4260.00
Hello CSCI       4260
Hello CSCI 4260


x = 2
y = 2.0
print("type of x = {},\ttype of y = {}".format(type(x), type(y)))
print("x + 1 = {},\tx -1 = {},\tx * 2 = {},\tx / 4 = {},".format(x+1, x-1, x*2, x/4))

type of x = <class 'int'>,	type of y = <class 'float'>
x + 1 = 3,	x -1 = 1,	x * 2 = 4,	x / 4 = 0.5,


print("before the type casting: type(x)=", type(x))
x = float(x)  # explicitly casting into float type
print("after casting int float: type(x)=", type(x))

before the type casting: type(x)= <class 'int'>
after casting int float: type(x)= <class 'float'>


mytrue, myfalse = True, False

print("True and False = {}".format(mytrue and myfalse))
print("True or False = {}".format(mytrue or myfalse))
print("not True = {}".format(not mytrue))
print("True xor False = {}".format(mytrue != myfalse))
print("True * 10 = {}".format(mytrue*10))  # True is treated as a value of 1
print("False * 10 = {}".format(myfalse*10))
print("Condition x == y = {}".format(x==y))
print("Condition 1 == 2 = {}".format(1==2))

True and False = False
True or False = True
not True = False
True xor False = True
True * 10 = 10
False * 10 = 0
Condition x == y = True
Condition 1 == 2 = False


my_string = "CSCI 4260 is cool!"
# concatenation
print("my_string + 'Yeah!' = {}".format(my_string + ' Yeah!'))
# indexing
print("You can access and extract substrings using indices: ", my_string[0:4])
print("The second to the last character is ", my_string[-2])
# length of a string
print("The length of my_string is {}.".format(len(my_string)))
print("Print the string character-by-character: ")
for c in my_string:
    print(c)
    
print("Does my_string contains the word 'CSCI'? ", 'CSCI' in my_string)
print("Does my_string contains the word 'MATH'? ", 'MATH' in my_string)
    
# this will cause an error because strings are immutable (meaning not changeable)
my_string[-1] = '?'

my_string + 'Yeah!' = CSCI 4260 is cool! Yeah!
You can access and extract substrings using indices:  CSCI
The second to the last character is  l
The length of my_string is 18.
Print the string character-by-character: 
C
S
C
I
 
4
2
6
0
 
i
s
 
c
o
o
l
!
Does my_string contains the word 'CSCI'?  True
Does my_string contains the word 'MATH'?  False

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-6-03227e613496> in <module>
     15 
     16 # this will cause an error because strings are immutable (meaning not changeable)
---> 17 my_string[-1] = '?'

TypeError: 'str' object does not support item assignment


# creating a list
mylist = [1, 2, 3, 'red', 'blue', 'green', 112*30]
print(mylist)

# add an item at the end of the list
mylist.append(1)
print(mylist)

# access each item with a (zero-starting) index
print("First item in my list is {0}".format(mylist[0]))
print("The item at index {0} is {1}".format(5, mylist[5]))
# A list can contain another list as an element.
mylist.append([1, 2, 3])
print(mylist)

# remove the i-th element
last_item = mylist.pop(0)
print(mylist)

[1, 2, 3, 'red', 'blue', 'green', 3360]
[1, 2, 3, 'red', 'blue', 'green', 3360, 1]
First item in my list is 1
The item at index 5 is green
[1, 2, 3, 'red', 'blue', 'green', 3360, 1, [1, 2, 3]]
[2, 3, 'red', 'blue', 'green', 3360, 1, [1, 2, 3]]


n = 10
x = range(n)  # this generates (nonnegative) integers smaller than n
print(x)

print(x[1:3])  # the default value for stride is 1
print(x[5:])   # print all the elements whose index is great than or equal to 5
print(x[:5])  # print all the elements whose index is smaller than 5
print(x[::2])  # print every other element
print(x[::3])  # every 3rd element

print(x[-1])  # use a negative index to access elements from the back
print(x[5:-1])
print(x[-5:-1:2])  # what will be the output?

# to reverse a list, you can do 
print(x[::-1])

range(0, 10)
range(1, 3)
range(5, 10)
range(0, 5)
range(0, 10, 2)
range(0, 10, 3)
9
range(5, 9)
range(5, 9, 2)
range(9, -1, -1)


# your code goes here (you only need one line of code!)


alphabet = ['a', 'b', 'c', 'd', 'e', 'f']

for letter in alphabet:
    print(letter)
    
print("\nenumerate() returns tuples, where each tuple contains an index and an item")
# if you need their indices as well
for i, letter in enumerate(alphabet):
    print(i, letter)

a
b
c
d
e
f

enumerate() returns tuples, where each tuple contains an index and an item
0 a
1 b
2 c
3 d
4 e
5 f


# the syntax is {key1: value1, key2: value2, ...}
d = {'red': '0xFF0000', 'green': '0x00FF00', 'blue': '0x0000FF'}  # useful for storing mappings

print("red -> {}".format(d['red']))
print("Is pink in d? {}".format('pink' in d))
print("Is green in d? {}".format('green' in d))

# if you try to access an item not in the dictionary, it throws an error
# uncomment the following line and try it by your self.
# print "pink -> {}".format(d['pink'])

# you can avoid this error by specifying a default value
colors = ['red', 'pink', 'green', 'yellow', 'blue']

for color in colors:
    print('{} -> {}'.format(color, d.get(color, 'unknown')))

red -> 0xFF0000
Is pink in d? False
Is green in d? True
red -> 0xFF0000
pink -> unknown
green -> 0x00FF00
yellow -> unknown
blue -> 0x0000FF


d['skyblue'] = '0x00FAFF'  # adding a new entry
d['red'] = '0xFFFFFF'      # modifying an existing entry
print(d)

{'red': '0xFFFFFF', 'green': '0x00FF00', 'blue': '0x0000FF', 'skyblue': '0x00FAFF'}


for color in d:
    print('color {}, code {}'.format(color, d[color]))

color red, code 0xFFFFFF
color green, code 0x00FF00
color blue, code 0x0000FF
color skyblue, code 0x00FAFF


s = set()  # an empty set
print("An empty set: ", s)

# add an element
s.add(1)
s.add(2)
print("S after adding 2 elements: ", s)

s1 = set(range(2, 10, 2))  # creating a set from a list
print("S1: ", s1)

s2 = set([2, 3, 5, 7])  # set of prime numbers
print("S2: ", s2)

s3 = set([1, 1, 1, 1, 1])  # generate from a list with duplicates
print("S3: ", s3)

An empty set:  set()
S after adding 2 elements:  {1, 2}
S1:  {8, 2, 4, 6}
S2:  {2, 3, 5, 7}
S3:  {1}


# union
print(s1.union(s2))  # {2, 4, 6, 8} U {2, 3, 5, 7}

# intersection
print(s1.intersection(s2))  # {2, 4, 6, 8} /\ {2, 3, 5, 7}

# set difference
print(s2.difference(s1))  # {2, 3, 5, 7} \ {2, 4, 6, 8}

{2, 3, 4, 5, 6, 7, 8}
{2}
{3, 5, 7}


def fibonacci(n):
    """
    Put your comment here. You comment should explain input parameters 
    and output of the function. For example,
    
    Parameters:
    ------------
    n : integer, the length of fibonacci sequence
    
    Output:
    F : list, fibonacci sequence
    """
    # it's good habit to validate input parameters
    if n < 0:
        print("n must be a non-negative number!")
        
    F = []  # container for the sequence       
    F.append(0)  # F(0) = 0
    
    if n == 0:
        return F
    
    F.append(1)  # F(1) = 0
    
    for i in range(n-1):
        F.append(F[i]+F[i+1])       
        
    return F
    
fib10 =  fibonacci(10)
print(fib10)

[0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55]


import numpy as np

arr1 = np.empty(10)  # create an empty 1d array of length 10
print(arr1)          # it's created but not intialized
# you should always intialize them before accessing them

# create and intialize each entry with zero values
arr2 = np.zeros((2, 2)) # 2d array of shape (2, 2)
print("\n2D array with shape {}".format(arr2.shape))
print(arr2)

# create and intialize with ones
arr3 = np.ones((3, 3, 3)) 
print(arr3)

# creating from an existing container
mylist = [1, 2, 3, 4, 5, 6]
myarray = np.array(mylist)
print(myarray)

[3.31023983e-322 3.55727265e-322 4.66839074e-313 0.00000000e+000
 0.00000000e+000 0.00000000e+000 6.89772896e-307 1.11261162e-306
 8.34443015e-308 3.91792476e-317]

2D array with shape (2, 2)
[[0. 0.]
 [0. 0.]]
[[[1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]]

 [[1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]]

 [[1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]]]
[1 2 3 4 5 6]


# you can also use the slicing operator
print(myarray[3:])
print(myarray[-1])

[4 5 6]
6


# boolean indexing

print(myarray)

# if you want to access only the element greater than 2, you can express it as
print(myarray > 2)

# the result is a vector of booleans, and we can use this boolean array as indices
print(myarray[myarray > 2])

# modifying only entries satisfying the condition
myarray[myarray > 2] =0
print(myarray)

[1 2 3 4 5 6]
[False False  True  True  True  True]
[3 4 5 6]
[1 2 0 0 0 0]


x = np.array([[1, 2], [3, 4]])
print("x has the shape", x.shape)

y = np.zeros_like(x)
print("y must have the same shape with x,", y.shape)

x has the shape (2, 2)
y must have the same shape with x, (2, 2)


arr = np.array([1, 2, 3, 4, 5])  # didn't specify a type
print("guessed data type =", arr.dtype)

# this can be problematic if you want to do arithmetic operations on them
arr = arr / 2   # divide each element by 2 (what will be the result?)
print(arr.dtype)  # notice the type has been chnaged       

# You may expect that x = x / 2 and x /= 2 are the same, but the follwing
# code will occur an error. Uncomment the next line and execute it.
# arr = np.array([1, 2, 3, 4, 5])
# arr /= 2
# print(arr)

# repeat the above but this time with type specified
arr = np.array([1, 2, 3, 4, 5], dtype=np.float32)
arr /= 2  # divide by 2
print(arr)

guessed data type = int32
float64
[0.5 1.  1.5 2.  2.5]


X = np.random.randn(2, 2)
Y = np.random.randn(2, 2)
print(X)
print(Y)
print("X + Y = \n", X + Y)  # element-wise addition

[[-1.04235898 -0.67119513]
 [ 0.64032331 -1.44064146]]
[[-1.61356232  0.1396451 ]
 [-0.59449651 -1.04062881]]
X + Y = 
 [[-2.6559213  -0.53155003]
 [ 0.0458268  -2.48127027]]


X = np.array([[1, 2, 3], [4, 5, 6]])
print(X)
Y = np.array([-1, 0, 1])

# What would be the result of X+Y?
X + Y

[[1 2 3]
 [4 5 6]]

array([[0, 2, 4],
       [3, 5, 7]])


print("X's shape is ", X.shape, " and Y's shape is ", Y.shape, ".")

X's shape is  (2, 3)  and Y's shape is  (3,) .


X = np.array([[1, 2, 3], [4, 5, 6]])
Y = np.array([1, 2])

# Can you guess the results of X+Y?


X + Y

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-51-e4a642f73c42> in <module>
----> 1 X + Y

ValueError: operands could not be broadcast together with shapes (2,3) (2,)


# python's list
a = range(1, 1001)
%timeit [i**2 for i in a]

378 µs ± 7.41 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


# repeat the same using a numpy array
a = np.arange(1, 1001)
%timeit a**2

1.48 µs ± 14.6 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


# generate a random data following Gaussian distribution
x = np.random.randn(5, 5) 
print(x)
# to save an numpy array into a text file
np.savetxt('myarray.out', x, delimiter=',')

# load the data from a text file 
y = np.loadtxt('myarray.out', delimiter=',')
print(y)

[[-1.58490594  0.41165671  0.63350179 -0.3401466  -0.67823426]
 [ 1.21955582  0.17596789 -1.37760864 -0.09794624 -2.26611511]
 [ 2.33058364 -0.44889901  0.40046212 -0.73817591  0.09498042]
 [-0.09215983  0.93309669 -0.81667184  0.21208477  1.30212128]
 [-0.92334705 -0.81187776 -0.59147425  1.14260113 -0.54246467]]
[[-1.58490594  0.41165671  0.63350179 -0.3401466  -0.67823426]
 [ 1.21955582  0.17596789 -1.37760864 -0.09794624 -2.26611511]
 [ 2.33058364 -0.44889901  0.40046212 -0.73817591  0.09498042]
 [-0.09215983  0.93309669 -0.81667184  0.21208477  1.30212128]
 [-0.92334705 -0.81187776 -0.59147425  1.14260113 -0.54246467]]


# you need to include this line to have the matplotlib diplay plots inline
%matplotlib inline
import matplotlib.pyplot as plt

# draw 10,000 random samples from the standard normal distribution
nsamples = 10000
nbins = 20
x = np.random.randn(nsamples)
# build a histogram (number of bins = 20)
bins, edges, patches = plt.hist(x, bins=nbins, facecolor='blue')

plt.xlabel('x')
plt.ylabel('Frequency')
plt.grid(True)
plt.show()


def coin_flip(p):
    """
    This function simulates a biased coin flip. Generate a random number 
    from uniform[0, 1), and return 1 if the number is less than or equal to p 
    and 0  otherwise.
    
    Parameters
    ---------------
    p: float, probability of getting a head
    
    Returns
    --------
    X: bernoulli random variable (either 0 or 1)
    """
    #######################
    # your code goes here #
    #######################
    
    return X


# we're going to throw this coin n times
p = 0.3
n = 10

experiment = np.array([coin_flip(p) for i in range(n)])

# how many heads did we have?
n_head = np.count_nonzero(experiment == 1)
print("probability of getting head = {:.2f}".format(float(n_head)/n))


sample_sizes = [10, 30, 50, 70, 100, 500, 1000, 5000, 10000, 20000, 50000]
estimates = []

# repeat the above procedure for different values of n
for n in sample_sizes:
    experiment = np.array([coin_flip(p) for i in range(n)])
    n_head = np.count_nonzero(experiment == 1)
    est_p = float(n_head) / n
    estimates.append(est_p)
    
# draw a plot showing how our estimate changes as we increase the sample size
plt.semilogx(sample_sizes, estimates, 'C0-', label='estimate')
plt.axhline(p, c='C1', ls='--', label='true p')
plt.xlabel('sample size (n)')
plt.ylabel('Probability of heads')
plt.legend(loc='best')


def n_coin_flips(n, p):
    """
    Parameters:
    ---------------------
    n: int, number of coin flips
    p: probability of getting head
    
    Returns
    -----------------
    n_head: int, number of heads
    """
    experiment = np.array([coin_flip(p) for i in range(n)])
    n_head = np.count_nonzero(experiment == 1)
    
    return n_head


# sample n_head t times
t = 1000
n = 10

# run n_coin_flips() t times
x = [n_coin_flips(n, p) for i in range(t)]

# compute mean and standard deviation of x
# for a list of statistical function in numpy, see 
# https://docs.scipy.org/doc/numpy-1.13.0/reference/routines.statistics.html
x_mean =  ### Which numpy function does compute the mean of the elements in the given array?
x_std =   ### Which numpy function can you use to compute their standard deviation?

# Do they match with analytical solution?
# FYI, when Y ~ binomial(n, p), its mean is np and variance is np(1-p).
print("sample_mean={:5.3f}, analytical mean={:5.3f}".format(x_mean, n*p))
print("sample std.={:.5f}, analytical std={:.5f}".format(x_std, np.sqrt(n*p*(1-p))))

# your code for drawing a histogram goes here
# set bins=8 and density=True

# you can add an analytical pmf using the following code
from scipy.stats import binom

y = np.arange(8)
pmf = binom.pmf(y, n, p)
plt.plot(y, pmf)

sample_mean=2.934, analytical mean=3.000
sample std.=1.47636, analytical std=1.44914

[<matplotlib.lines.Line2D at 0xeed9dd8>]


def square(x):
    return (x**2)

def func_A(x):
    n = len(x)
    SS = 0
    
    for i in range(n):
        SS += (x[i]*x[i])
        
    return SS

def func_B(x):    
    return sum(map(square, x))

def func_C(x):
    return np.sum(np.square(x))

# create a list of length 100
x = np.arange(1, 51)

print("func_A(x)=", func_A(x))
print("func_B(x)=", func_B(x))
print("func_C(x)=", func_C(x))

# now check which function is the fastest
%timeit func_A(x)
%timeit func_B(x)
%timeit func_C(x)

func_A(x)= 42925
func_B(x)= 42925
func_C(x)= 42925
100000 loops, best of 3: 12.8 µs per loop
100000 loops, best of 3: 15.9 µs per loop
The slowest run took 9.84 times longer than the fastest. This could mean that an intermediate result is being cached.
100000 loops, best of 3: 2.57 µs per loop

Function	Description
np.random.randn()	draw samples from a standard normal
np.random.rand()	draw samples from a uniform distribution
np.random.binomial()	draw samples from a binomial distribution
np.random.uniform()	draw samples from a uniform[0,1) distribution
np.random.gamma()	draw samples from a gamma distribution
np.random.shuffle()	randomly permute a sequence in place
np.random.choice()	generate a random sample from an array

PPML Summer School: Python Overview¶

Installing Python¶

Installing Anaconda¶

(Optional) If you already have installed Python or Anaconda¶

Jupyter Notebook¶

Notebook mode¶

Keyboard shortcuts¶

Q1. Report the versions of packages on your system (5 pts).¶

Basics of Python¶

Hello World¶

Basic Data Types and Operations¶

Booleans¶

Strings¶

Containers¶

List¶

Q2. reverse a string using the list slicing (5 pts).¶

Dictionaries¶

Sets¶

Functions¶

Be careful with indentation!¶

Numpy¶

Array¶

Q3. Explain why the commented line of code in the above cell causes an error (5pts).¶

Broadcasting¶

Saving and Loading Text Files¶

Random Number Generation¶

Our first plot¶

Q3. Write a function that simulates a (biased) coin flip (10 pts).¶

What did we do?¶

Q4. Simulate n coin flips t times and draw a histogram (5 pts).¶

Timeit¶