ML Prerequisites Public

  • Email
  • Share
  • Contributors
  • Close Course

ML Prerequisites

Tags

Module Information

Todo: http://pandas.pydata.org/pandas-docs/stable/dsintro.html#dataframe   -> Dataframe object for Data manipulation with integrated indexing -> Tool for reading/writing data Pandas are implemented as 2 classes: a.) DataFrame: rows and names columns b.) Series: single column. DataFrame contains one/more named series. Panda series can be used as an argument to most NumPy functions. Series.apply() can be used for complex single-column transformations. It accepts a lambda function as an argument to be applied to each value  import pandas as pd print(pd.__version__)     # 0.19.2 # creating Series object city = pd.Series(['AA', 'BB', 'CC'])# 0    AA # 1    BB # 2    CC # creating DataFrame objects. Pass a dict mapping column names to series. If series dont match in length, NAN values are assigned population = pd.Series([12, 13, 45]) city_pop_df = pd.DataFrame({'city_name': city, 'Population': population})#       Population city_name # 0          12        AA # 1          13        BB # 2          45        CC california_housing_dataframe = pd.read_csv("https://storage.googleapis.com/mledu-datasets/california_housing_train.csv", sep=",")# Picks data column wise from csv and creates dataframe # DataFrame.describe() shows interesting statistics abt the dataframe california_housing_dataframe.describe() # count, mean, std, min, 25%, 50%, 75%, max  # DataFrame.head displays the first few records of a DataFrame: print('FIRST FEW RECORDS') print(california_housing_dataframe.head()) # Accessing DataFrame using dict/list print(city_pop_df['city_name'][0])     # AA print(city_pop_df[0:2])#    Population city_name # 0          12        AA # 1          13        BB # Manipulating Data print('Arithmetic operation on series') print(population/1000)# 0    0.012 # 1    0.013 # 2    0.045 # Numpy can use panda series as argument to its functions print('Numpy log on Population') print(np.log(population))# 0    2.484907 # 1    2.564949 # 2    3.806662 # Series.apply() can create complex single-column transformations, using lambda as an argument greater_pop = population.apply(lambda val:val> 12) print(greater_pop)# 0    False # 1     True # 2     True # Adds 2 Series to the Dataframe city_pop_df['Area_sq_miles'] = pd.Series([45, 65, 34]) city_pop_df['Population_density'] = city_pop_df['Population']/city_pop_df['Area_sq_miles'] print(city_pop_df)#        Population city_name  Area_sq_miles  Population_density # 0          12        AA             45            0.266667 # 1          13        BB             65            0.200000 # 2          45        CC             34            1.323529
Show less
TensorFlow is an open source software library for high-performance numerical computation. Tensors are arrays of arbitrary dimension. TensorFlow can be used to manipulate Tensors of very high dimension. Some low dimensional Tensors are: A scalar is a 0-d array (a 0th-order tensor). For example, "Howdy" or 5 A vector is a 1-d array (a 1st-order tensor). For example, [2, 3, 5, 7, 11] or [5] A matrix is a 2-d array (a 2nd-order tensor). For example, [[3.1, 8.2, 5.9][4.3, -2.7, 6.5]] A TensorFlow graph(computational graph, data flow graph) is a Graph Data Structure. The graph's nodes are operations. Edges are Tensors. Tensors flow through a graph, manipulated at each node by an operation. Tensors are stored in graphs as constants or Variables.   import tensorflow as tf # Tensors can be stored in graph as constants or Variables.x = tf.constant(5.2) y = tf.Variable([5]) print(x,y) # Assign a diffetent valuey = y.assign([6]) # Graphs run within a TensorFlow Session, which holds the state for the graph(s) with tf.Session() as sess:     initialization = tf.global_variables_initializer()     print(y.eval()) # When working eith tf.Variables, you must explicitly initialize them by calling tf.global_variables_initializer() at the start of the session # After defining these you can cobine them with other operations like tf.add. A new tensor will be returned with the sum of these 2  # create a graphg = tf.Graph() # Establish the graph as default graphwith g.as_default():     x = tf.constant(8, name='x_const')     y = tf.constant(5, name='y_const')     z = tf.constant(4, name='z_const')     my_sum = tf.add(x, y, name='x_y_sum')     my_sum_z = tf.add(my_sum, z, name='x_y_z_sum')     # create a session to run the default graph     with tf.Session() as sess:         print(my_sum.eval())         # Output: 13         print(my_sum_z.eval())         # Output: 17 # Working with Vectorsprint('VECTOR ADDITION') with tf.Graph().as_default():     # create 6 element vectors     primes = tf.constant([2,3,5,7,11,13], dtype=tf.int32)     ones = tf.ones([6], dtype=tf.int32)    # [1,1,1,1,1,1]     # Add them n store in another vector     my_sum = tf.add(primes, ones)     with tf.Session() as sess:         print('Vector Sum: ', my_sum.eval())         # ('Vector Sum: ', array([ 3,  4,  6,  8, 12, 14], dtype=int32)) print('TENSOR SHAPES') # shapes categorize size and number of dimensions in a tensor. Its a list, with ith element representing size along dimension i. Length of this list indicates rank of tensor(no. of dimensions) with tf.Graph().as_default():     # 0-D Tensor     scalar = tf.zeros([])     # 3 elements     vector = tf.zeros([3])     # 2 rows, 3 columns     matrix = tf.zeros([2,3])     with tf.Session() as sess:         print('scalar shape: ', scalar.get_shape(), 'value: ', scalar.eval())         print('vector shape: ', vector.get_shape(), 'value: ', vector.eval())         print('matrix shape: ', matrix.get_shape(), 'value: ', matrix.eval()) print('BROADCASTING') # A smaller array is enlarged to ahve the same shape as larger array to perform element wise operation   with tf.Graph().as_default():     primes = tf.constant([2,3,5,7,11,13], dtype=tf.int32)     ones = tf.constant(1, dtype=tf.int32)     my_sum = tf.add(primes, ones)     with tf.Session() as sess:         print(my_sum.eval())        # [ 3  4  6  8 12 14] print('MATRIX MULTIPLICATION') # In linear algebra, nc of first matrix shud be equal to nr in second for Matrix multiplication. with tf.Graph().as_default():     # 3 * 4 2-d tensor    x = tf.constant([[1,2,3, 4], [5,6,7,8], [9,10,11,12]], dtype=tf.int32)     # 4*2    y = tf.constant([[2,2], [3,5], [4,5], [1,5]], dtype=tf.int32)     my_mul = tf.matmul(x,y)     with tf.Session() as sess:         print(my_mul.eval())     # [[ 24  47]      # [ 64 115]      # [104 183]] print('TENSOR RESHAPING') with tf.Graph().as_default():     # 8 * 2 matrix (2-D tensor)     matrix = tf.constant([[1,2], [3,4], [5,6], [7,8],                          [9,10], [11,12], [13,14], [15,16]], dtype=tf.int32)     reshaped_2_8 = tf.reshape(matrix, [2,8])     # reshape to 3-d      reshaped_2_2_8 = tf.reshape(matrix, [2,2,4])     with tf.Session() as sess:         print('reshape to 2 * 8')         print(reshaped_2_8.eval())         # [[ 1  2  3  4  5  6  7  8]          # [ 9 10 11 12 13 14 15 16]]          print('reshape to 2 * 2 * 4')          print(reshaped_2_2_8.eval())          # [[[ 1  2  3  4]         # [ 5  6  7  8]]          # [[ 9 10 11 12]           # [13 14 15 16]]]   print('Variables') # Values can be changedwith tf.Graph().as_default():     v = tf.Variable([3])     w = tf.Variable(tf.random_normal([1], mean=1.0, stddev=0.35))     with tf.Session() as sees:         try:             print(v.eval())         except tf.errors.FailedPreconditionError as e:             print(e)             # Error that variable is uninitialized         sees.run(tf.global_variables_initializer())         print(v.eval())    # [3]         print(w.eval())    # [0.98439926] # with every new session, variable values have to be reinitialized         assignment = tf.assign(v, [7])         print('reassigned. wont chnge: ',v.eval())    # Value will not change until reinitialized         sees.run(assignment)         print('reinitialized. value changes: ',v.eval())
Show less