1. Operaciones matemáticas elementales

  1. Las operaciones elementares son +, -, /, *, **, también // y % sobre los enteros.
1+1
## 2
3-3.5
## -0.5
4*7
## 28
3/5
## 0.6
2**5
## 32
25//3
## 8
25/3
## 8.333333333333334

Es el cociente de una división euclideana de a por b (a//b)

23%10
## 3

es el residuo de la division euclideana de a p b.

Imprimir con el comando print.

print(3.14*12)
## 37.68
print(3)
## 3
print(3,2,"Hello!")
## 3 2 Hello!

Módulo numpy

Importar numpy:

import numpy as np

Se utiliza el prefijo np para llamar a las funciones o parámetros de la libreria numpy

np.pi
## 3.141592653589793

para conocer las fuciones de la librería numpy se usa el comando siguiente

dir(np)
## ['ALLOW_THREADS', 'BUFSIZE', 'CLIP', 'DataSource', 'ERR_CALL', 'ERR_DEFAULT', 'ERR_IGNORE', 'ERR_LOG', 'ERR_PRINT', 'ERR_RAISE', 'ERR_WARN', 'FLOATING_POINT_SUPPORT', 'FPE_DIVIDEBYZERO', 'FPE_INVALID', 'FPE_OVERFLOW', 'FPE_UNDERFLOW', 'False_', 'Inf', 'Infinity', 'MAXDIMS', 'MAY_SHARE_BOUNDS', 'MAY_SHARE_EXACT', 'NAN', 'NINF', 'NZERO', 'NaN', 'PINF', 'PZERO', 'RAISE', 'RankWarning', 'SHIFT_DIVIDEBYZERO', 'SHIFT_INVALID', 'SHIFT_OVERFLOW', 'SHIFT_UNDERFLOW', 'ScalarType', 'True_', 'UFUNC_BUFSIZE_DEFAULT', 'UFUNC_PYVALS_NAME', 'WRAP', '_CopyMode', '_NoValue', '_UFUNC_API', '__NUMPY_SETUP__', '__all__', '__builtins__', '__cached__', '__config__', '__deprecated_attrs__', '__dir__', '__doc__', '__expired_functions__', '__file__', '__former_attrs__', '__future_scalars__', '__getattr__', '__loader__', '__name__', '__package__', '__path__', '__spec__', '__version__', '_add_newdoc_ufunc', '_builtins', '_distributor_init', '_financial_names', '_get_promotion_state', '_globals', '_int_extended_msg', '_mat', '_no_nep50_warning', '_pyinstaller_hooks_dir', '_pytesttester', '_set_promotion_state', '_specific_msg', '_typing', '_using_numpy2_behavior', '_utils', 'abs', 'absolute', 'add', 'add_docstring', 'add_newdoc', 'add_newdoc_ufunc', 'all', 'allclose', 'alltrue', 'amax', 'amin', 'angle', 'any', 'append', 'apply_along_axis', 'apply_over_axes', 'arange', 'arccos', 'arccosh', 'arcsin', 'arcsinh', 'arctan', 'arctan2', 'arctanh', 'argmax', 'argmin', 'argpartition', 'argsort', 'argwhere', 'around', 'array', 'array2string', 'array_equal', 'array_equiv', 'array_repr', 'array_split', 'array_str', 'asanyarray', 'asarray', 'asarray_chkfinite', 'ascontiguousarray', 'asfarray', 'asfortranarray', 'asmatrix', 'atleast_1d', 'atleast_2d', 'atleast_3d', 'average', 'bartlett', 'base_repr', 'binary_repr', 'bincount', 'bitwise_and', 'bitwise_not', 'bitwise_or', 'bitwise_xor', 'blackman', 'block', 'bmat', 'bool_', 'broadcast', 'broadcast_arrays', 'broadcast_shapes', 'broadcast_to', 'busday_count', 'busday_offset', 'busdaycalendar', 'byte', 'byte_bounds', 'bytes_', 'c_', 'can_cast', 'cast', 'cbrt', 'cdouble', 'ceil', 'cfloat', 'char', 'character', 'chararray', 'choose', 'clip', 'clongdouble', 'clongfloat', 'column_stack', 'common_type', 'compare_chararrays', 'compat', 'complex128', 'complex64', 'complex_', 'complexfloating', 'compress', 'concatenate', 'conj', 'conjugate', 'convolve', 'copy', 'copysign', 'copyto', 'corrcoef', 'correlate', 'cos', 'cosh', 'count_nonzero', 'cov', 'cross', 'csingle', 'ctypeslib', 'cumprod', 'cumproduct', 'cumsum', 'datetime64', 'datetime_as_string', 'datetime_data', 'deg2rad', 'degrees', 'delete', 'deprecate', 'deprecate_with_doc', 'diag', 'diag_indices', 'diag_indices_from', 'diagflat', 'diagonal', 'diff', 'digitize', 'disp', 'divide', 'divmod', 'dot', 'double', 'dsplit', 'dstack', 'dtype', 'dtypes', 'e', 'ediff1d', 'einsum', 'einsum_path', 'emath', 'empty', 'empty_like', 'equal', 'errstate', 'euler_gamma', 'exceptions', 'exp', 'exp2', 'expand_dims', 'expm1', 'extract', 'eye', 'fabs', 'fastCopyAndTranspose', 'fft', 'fill_diagonal', 'find_common_type', 'finfo', 'fix', 'flatiter', 'flatnonzero', 'flexible', 'flip', 'fliplr', 'flipud', 'float16', 'float32', 'float64', 'float_', 'float_power', 'floating', 'floor', 'floor_divide', 'fmax', 'fmin', 'fmod', 'format_float_positional', 'format_float_scientific', 'format_parser', 'frexp', 'from_dlpack', 'frombuffer', 'fromfile', 'fromfunction', 'fromiter', 'frompyfunc', 'fromregex', 'fromstring', 'full', 'full_like', 'gcd', 'generic', 'genfromtxt', 'geomspace', 'get_array_wrap', 'get_include', 'get_printoptions', 'getbufsize', 'geterr', 'geterrcall', 'geterrobj', 'gradient', 'greater', 'greater_equal', 'half', 'hamming', 'hanning', 'heaviside', 'histogram', 'histogram2d', 'histogram_bin_edges', 'histogramdd', 'hsplit', 'hstack', 'hypot', 'i0', 'identity', 'iinfo', 'imag', 'in1d', 'index_exp', 'indices', 'inexact', 'inf', 'info', 'infty', 'inner', 'insert', 'int16', 'int32', 'int64', 'int8', 'int_', 'intc', 'integer', 'interp', 'intersect1d', 'intp', 'invert', 'is_busday', 'isclose', 'iscomplex', 'iscomplexobj', 'isfinite', 'isfortran', 'isin', 'isinf', 'isnan', 'isnat', 'isneginf', 'isposinf', 'isreal', 'isrealobj', 'isscalar', 'issctype', 'issubclass_', 'issubdtype', 'issubsctype', 'iterable', 'ix_', 'kaiser', 'kron', 'lcm', 'ldexp', 'left_shift', 'less', 'less_equal', 'lexsort', 'lib', 'linalg', 'linspace', 'little_endian', 'load', 'loadtxt', 'log', 'log10', 'log1p', 'log2', 'logaddexp', 'logaddexp2', 'logical_and', 'logical_not', 'logical_or', 'logical_xor', 'logspace', 'longcomplex', 'longdouble', 'longfloat', 'longlong', 'lookfor', 'ma', 'mask_indices', 'mat', 'matmul', 'matrix', 'max', 'maximum', 'maximum_sctype', 'may_share_memory', 'mean', 'median', 'memmap', 'meshgrid', 'mgrid', 'min', 'min_scalar_type', 'minimum', 'mintypecode', 'mod', 'modf', 'moveaxis', 'msort', 'multiply', 'nan', 'nan_to_num', 'nanargmax', 'nanargmin', 'nancumprod', 'nancumsum', 'nanmax', 'nanmean', 'nanmedian', 'nanmin', 'nanpercentile', 'nanprod', 'nanquantile', 'nanstd', 'nansum', 'nanvar', 'nbytes', 'ndarray', 'ndenumerate', 'ndim', 'ndindex', 'nditer', 'negative', 'nested_iters', 'newaxis', 'nextafter', 'nonzero', 'not_equal', 'numarray', 'number', 'obj2sctype', 'object_', 'ogrid', 'oldnumeric', 'ones', 'ones_like', 'outer', 'packbits', 'pad', 'partition', 'percentile', 'pi', 'piecewise', 'place', 'poly', 'poly1d', 'polyadd', 'polyder', 'polydiv', 'polyfit', 'polyint', 'polymul', 'polynomial', 'polysub', 'polyval', 'positive', 'power', 'printoptions', 'prod', 'product', 'promote_types', 'ptp', 'put', 'put_along_axis', 'putmask', 'quantile', 'r_', 'rad2deg', 'radians', 'random', 'ravel', 'ravel_multi_index', 'real', 'real_if_close', 'rec', 'recarray', 'recfromcsv', 'recfromtxt', 'reciprocal', 'record', 'remainder', 'repeat', 'require', 'reshape', 'resize', 'result_type', 'right_shift', 'rint', 'roll', 'rollaxis', 'roots', 'rot90', 'round', 'round_', 'row_stack', 's_', 'safe_eval', 'save', 'savetxt', 'savez', 'savez_compressed', 'sctype2char', 'sctypeDict', 'sctypes', 'searchsorted', 'select', 'set_numeric_ops', 'set_printoptions', 'set_string_function', 'setbufsize', 'setdiff1d', 'seterr', 'seterrcall', 'seterrobj', 'setxor1d', 'shape', 'shares_memory', 'short', 'show_config', 'show_runtime', 'sign', 'signbit', 'signedinteger', 'sin', 'sinc', 'single', 'singlecomplex', 'sinh', 'size', 'sometrue', 'sort', 'sort_complex', 'source', 'spacing', 'split', 'sqrt', 'square', 'squeeze', 'stack', 'std', 'str_', 'string_', 'subtract', 'sum', 'swapaxes', 'take', 'take_along_axis', 'tan', 'tanh', 'tensordot', 'test', 'testing', 'tile', 'timedelta64', 'trace', 'tracemalloc_domain', 'transpose', 'trapz', 'tri', 'tril', 'tril_indices', 'tril_indices_from', 'trim_zeros', 'triu', 'triu_indices', 'triu_indices_from', 'true_divide', 'trunc', 'typecodes', 'typename', 'ubyte', 'ufunc', 'uint', 'uint16', 'uint32', 'uint64', 'uint8', 'uintc', 'uintp', 'ulonglong', 'unicode_', 'union1d', 'unique', 'unpackbits', 'unravel_index', 'unsignedinteger', 'unwrap', 'ushort', 'vander', 'var', 'vdot', 'vectorize', 'version', 'void', 'vsplit', 'vstack', 'where', 'who', 'zeros', 'zeros_like']

llamamos una función fct por el comando np.fct. por ejemplo:

np.arccos(0.313)
## 1.2524462159064285

Para acceder a la ayuda de la función abs se utiliza np.info(np.abs) o help(np.abs). http://www.numpy.org/

np.info(np.abs)
## absolute(x, /, out=None, *, where=True, casting='same_kind', order='K', dtype=None, subok=True[, signature, extobj])
## 
## Calculate the absolute value element-wise.
## 
## ``np.abs`` is a shorthand for this function.
## 
## Parameters
## ----------
## x : array_like
##     Input array.
## out : ndarray, None, or tuple of ndarray and None, optional
##     A location into which the result is stored. If provided, it must have
##     a shape that the inputs broadcast to. If not provided or None,
##     a freshly-allocated array is returned. A tuple (possible only as a
##     keyword argument) must have length equal to the number of outputs.
## where : array_like, optional
##     This condition is broadcast over the input. At locations where the
##     condition is True, the `out` array will be set to the ufunc result.
##     Elsewhere, the `out` array will retain its original value.
##     Note that if an uninitialized `out` array is created via the default
##     ``out=None``, locations within it where the condition is False will
##     remain uninitialized.
## **kwargs
##     For other keyword-only arguments, see the
##     :ref:`ufunc docs <ufuncs.kwargs>`.
## 
## Returns
## -------
## absolute : ndarray
##     An ndarray containing the absolute value of
##     each element in `x`.  For complex input, ``a + ib``, the
##     absolute value is :math:`\sqrt{ a^2 + b^2 }`.
##     This is a scalar if `x` is a scalar.
## 
## Examples
## --------
## >>> x = np.array([-1.2, 1.2])
## >>> np.absolute(x)
## array([ 1.2,  1.2])
## >>> np.absolute(1.2 + 1j)
## 1.5620499351813308
## 
## Plot the function over ``[-10, 10]``:
## 
## >>> import matplotlib.pyplot as plt
## 
## >>> x = np.linspace(start=-10, stop=10, num=101)
## >>> plt.plot(x, np.absolute(x))
## >>> plt.show()
## 
## Plot the function over the complex plane:
## 
## >>> xx = x + 1j * x[:, np.newaxis]
## >>> plt.imshow(np.abs(xx), extent=[-10, 10, -10, 10], cmap='gray')
## >>> plt.show()
## 
## The `abs` function can be used as a shorthand for ``np.absolute`` on
## ndarrays.
## 
## >>> x = np.array([-1.2, 1.2])
## >>> abs(x)
## array([1.2, 1.2])

Redondear

  1. Se utiliza np.round y np.around por ejemplo redondeamos pi en 3.1415.
np.round(np.pi,4)
## 3.1416
np.around(np.pi,4)
## 3.1416
np.around(np.pi,4)
## 3.1416

2. Variables y tipos

Declaración = asignación

a = 1.234
a
## 1.234
b = 5
c = False
type(a)
## <class 'float'>
type(b)
## <class 'int'>
type(c)
## <class 'bool'>

Cambiar el tipo de la variable int, float o bool. Por ejemplo :

int(c)
## 0
float(b)
## 5.0
int(a)
## 1
float("160")
## 160.0

Operaciones sobre las variables

a*b - 3
## 3.17
(c+1)*a
## 1.234
b > 0
## True
b > 0 and a <= 1
## False
b > 0 or a <= 2
## True
c != True
## True

Copia superficial y profunda

a = [1,2]
b = a
b[0] = 2
(a, b)
## ([2, 2], [2, 2])

Copia superficial

import copy
a = [1,2]
b = copy.copy(a)
b[0] = 2
(a, b)
## ([1, 2], [2, 2])

Copia profunda

a = [[1, 2], [3]]
b = copy.copy(a)
b[0][0] = 0
(a, b)
## ([[0, 2], [3]], [[0, 2], [3]])
a = [[1, 2], [3]]
b = copy.deepcopy(a)
b[0][0] = 0
(a, b)
## ([[1, 2], [3]], [[0, 2], [3]])

la operación de permutación entre a y b es ralizado por el comando

a,b = b,a
a
## [[0, 2], [3]]
b
## [[1, 2], [3]]

4)Realizar cambio de variables utilizando una varable auxiliar.

f = 5
g = 6
temp = 5
f = g
g = temp
print(f,g)
## 6 5

Cadena de caracteres

chaine = "Hola mundo !"

print(chaine)
## Hola mundo !
chaine
## 'Hola mundo !'
age = 3
message = "Yo tengo " + str(age) + " anios."
print(message)
## Yo tengo 3 anios.
len(chaine)
## 12
chaine+" "+message
## 'Hola mundo ! Yo tengo 3 anios.'
chaine[0]
## 'H'
chaine[4]
## ' '
chaine[3:7]
## 'a mu'
chaine[3:-2]
## 'a mundo'
  1. Utilizaremos la concatenación (+) para “reemplazar” la H inicial por un h minúscula.
chaine = "h"+chaine[1:]
chaine
## 'hola mundo !'
chaine[-3::]
## 'o !'
#chaine[5::]
chaine[4]
## ' '
chaine[::2]
## 'hl ud '
chaine[::-1]
## '! odnum aloh'
chaine[::-2]
## '!onmao'
"fichier"+str(2)
## 'fichier2'

3. Arreglos

Listas

las listas = secuencia = objetos capaz de contener objetos de diferente tipo

l = [1,False, "Hello!"]
l
## [1, False, 'Hello!']
ma_liste = [1,1,2,3,5,8,13,21]
del ma_liste[0]
ma_liste
## [1, 2, 3, 5, 8, 13, 21]
del ma_liste[4]
ma_liste
## [1, 2, 3, 5, 13, 21]

Agregar un elemento al final de la lista append.

ma_liste.append(34)
ma_liste
## [1, 2, 3, 5, 13, 21, 34]
l = list(range(10))
l+[1]
## [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1]
max(l)
## 9
np.argmax(l)
## 9
np.argmax(ma_liste)
## 6
l.pop(4)
## 4
l
## [0, 1, 2, 3, 5, 6, 7, 8, 9]
l.insert(1,55)
l
## [0, 55, 1, 2, 3, 5, 6, 7, 8, 9]
l.insert(5,55)
l
## [0, 55, 1, 2, 3, 55, 5, 6, 7, 8, 9]
55 in l
## True
l + l
## [0, 55, 1, 2, 3, 55, 5, 6, 7, 8, 9, 0, 55, 1, 2, 3, 55, 5, 6, 7, 8, 9]
ll = l + l 
ll[0::2]
## [0, 1, 3, 5, 7, 9, 55, 2, 55, 6, 8]

Tuplas

Una tupla es una secuencia de valores agrupados. Una tupla sirve para agrupar, como si fueran un único valor, varios valores que, por su naturaleza, deben ir juntos. El tipo de datos que representa a las tuplas se llama tuple. El tipo tuple es inmutable: una tupla no puede ser modificada una vez que ha sido creada.

t = [1,1,2,False]
t
## [1, 1, 2, False]
t.append(2)
t
## [1, 1, 2, False, 2]
t[1] = 12

ciertas funciones sirven para las listas pero no para las tuplas

2 in t
## True
t + t
## [1, 12, 2, False, 2, 1, 12, 2, False, 2]
t[0::2]
## [1, 2, 2]
range(5)
## range(0, 5)
len(range(5))
## 5
5 in range(5)
## False
4 in range(5)
## True

Diccionarios

monty_python = {'titre': 'Holy Grail', 'date': 1975}
monty_python
## {'titre': 'Holy Grail', 'date': 1975}
monty_python["date"]
## 1975
monty_python.keys()
## dict_keys(['titre', 'date'])
monty_python.values()
## dict_values(['Holy Grail', 1975])
monty_python.items()
## dict_items([('titre', 'Holy Grail'), ('date', 1975)])

4. Estructuras de control

z = 2
if z > 1: 
    print("z es mas grande que 1")
## z es mas grande que 1
if z < 1:
    print("z es mas pequeno que 1")
  1. utilizando if, elif y else construimos un código que permita decir si una variable a es estrictamente positiva, estrictamente negativa o nula
a = 0

if a > 0: # Positivo
    print("a es positivo.")
elif a < 0: # Negativo
    print("a es negativo.")
else: # Nulo
    print("a es nulo.")
## a es nulo.

while

nb = 7 # Mantenemos la variable que contiene el numero que queremos la tabla de multiplicar
i = 0 # Esta es nuestra variable contraria que incrementaremos en el ciclo

while i < 10: # Siempre y cuando sea estrictamente menos de 10
    print(i + 1, "*", nb, "=", (i + 1) * nb)
    i += 1 # Incrementamos i por 1 en cada vuelta del lazo
## 1 * 7 = 7
## 2 * 7 = 14
## 3 * 7 = 21
## 4 * 7 = 28
## 5 * 7 = 35
## 6 * 7 = 42
## 7 * 7 = 49
## 8 * 7 = 56
## 9 * 7 = 63
## 10 * 7 = 70

Otro ejemplo con una lista

ma_liste = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
i = 0 
while i < len(ma_liste):
    print(ma_liste[i])
    i += 1 
## a
## b
## c
## d
## e
## f
## g
## h
  1. Calcula \(\displaystyle\sum_{i=1}^N \frac{1}{i}\) para \(N = 100\) y \(N = 1000\).
N = 1000
s = 0 
i = 1 
while i <= N: 
    s = s + (1.0/i)
    i += 1
    
s
## 7.485470860550343

for

chaine = "Hola mundo"
for lettre in chaine:
    print(lettre)
## H
## o
## l
## a
##  
## m
## u
## n
## d
## o

De manera simple con enteros

for i in range(5):
    print(i)
## 0
## 1
## 2
## 3
## 4

Las excepciones

# annee = input()
annee = 2010.2
try: # Convertir el anio en entero
    annee = int(annee)
except:
    print("Error")

5. Funciones

def table(nb, max=10):
    """Función que muestra la tabla de multiplicar por nb
    de 1*nb hasta max*nb
    (max >= 0)"""
    
    i = 0
    while i < max:
        print(i + 1, "*", nb, "=", (i + 1) * nb)
        i += 1

table(4)
## 1 * 4 = 4
## 2 * 4 = 8
## 3 * 4 = 12
## 4 * 4 = 16
## 5 * 4 = 20
## 6 * 4 = 24
## 7 * 4 = 28
## 8 * 4 = 32
## 9 * 4 = 36
## 10 * 4 = 40
def cuadrado(valeur):
    return valeur * valeur

cuadrado(12)
## 144

6. Introducción a Numpy

a = np.arange(15).reshape(3, 5)
a
## array([[ 0,  1,  2,  3,  4],
##        [ 5,  6,  7,  8,  9],
##        [10, 11, 12, 13, 14]])
a.shape
## (3, 5)
a.ndim
## 2
a.size
## 15
a[1,1]
## 6
a[1,1] = 0
a
## array([[ 0,  1,  2,  3,  4],
##        [ 5,  0,  7,  8,  9],
##        [10, 11, 12, 13, 14]])
b = np.array([(1.5,2,3), (4,5,6)])
b
## array([[1.5, 2. , 3. ],
##        [4. , 5. , 6. ]])
np.sqrt(b)
## array([[1.22474487, 1.41421356, 1.73205081],
##        [2.        , 2.23606798, 2.44948974]])
np.sin(b)
## array([[ 0.99749499,  0.90929743,  0.14112001],
##        [-0.7568025 , -0.95892427, -0.2794155 ]])
b-3
## array([[-1.5, -1. ,  0. ],
##        [ 1. ,  2. ,  3. ]])

Observación

np.linspace(0, 10, 12)
## array([ 0.        ,  0.90909091,  1.81818182,  2.72727273,  3.63636364,
##         4.54545455,  5.45454545,  6.36363636,  7.27272727,  8.18181818,
##         9.09090909, 10.        ])
np.zeros((4,4))
## array([[0., 0., 0., 0.],
##        [0., 0., 0., 0.],
##        [0., 0., 0., 0.],
##        [0., 0., 0., 0.]])
np.ones((4,4))
## array([[1., 1., 1., 1.],
##        [1., 1., 1., 1.],
##        [1., 1., 1., 1.],
##        [1., 1., 1., 1.]])
M=np.mat('[1 2 4 ; 3 4 5.]')
N=np.mat('[2. ; 4 ; 6]')
print(M)
## [[1. 2. 4.]
##  [3. 4. 5.]]
print(N)
## [[2.]
##  [4.]
##  [6.]]
M*N
## matrix([[34.],
##         [52.]])
import time
X = np.mat([range(1000000)])
Y = np.mat([range(1000000)]).T
debut = time.time()
W = X*Y
fin = time.time()
print(W)
## [[584144992]]
res1 = fin-debut
res1
## 0.013510942459106445
debut = time.time()
res = 0
for i in range(1000000):
    res = res + i*i
fin = time.time()
res2 = fin-debut
print(res2/res1)
## 12.029769362438017
print(res2)
## 0.16253352165222168
print(res1)
## 0.013510942459106445
  1. Crear la matriz cuadrada 10x10
M=np.arange(100).reshape(10,10)
M = M+1
print(M)
## [[  1   2   3   4   5   6   7   8   9  10]
##  [ 11  12  13  14  15  16  17  18  19  20]
##  [ 21  22  23  24  25  26  27  28  29  30]
##  [ 31  32  33  34  35  36  37  38  39  40]
##  [ 41  42  43  44  45  46  47  48  49  50]
##  [ 51  52  53  54  55  56  57  58  59  60]
##  [ 61  62  63  64  65  66  67  68  69  70]
##  [ 71  72  73  74  75  76  77  78  79  80]
##  [ 81  82  83  84  85  86  87  88  89  90]
##  [ 91  92  93  94  95  96  97  98  99 100]]
M.shape
## (10, 10)

Probar las operaciones siguientes

M.sum()
## 5050
M.cumsum()
## array([   1,    3,    6,   10,   15,   21,   28,   36,   45,   55,   66,
##          78,   91,  105,  120,  136,  153,  171,  190,  210,  231,  253,
##         276,  300,  325,  351,  378,  406,  435,  465,  496,  528,  561,
##         595,  630,  666,  703,  741,  780,  820,  861,  903,  946,  990,
##        1035, 1081, 1128, 1176, 1225, 1275, 1326, 1378, 1431, 1485, 1540,
##        1596, 1653, 1711, 1770, 1830, 1891, 1953, 2016, 2080, 2145, 2211,
##        2278, 2346, 2415, 2485, 2556, 2628, 2701, 2775, 2850, 2926, 3003,
##        3081, 3160, 3240, 3321, 3403, 3486, 3570, 3655, 3741, 3828, 3916,
##        4005, 4095, 4186, 4278, 4371, 4465, 4560, 4656, 4753, 4851, 4950,
##        5050])
M.mean()
## 50.5
M[2::]
## array([[ 21,  22,  23,  24,  25,  26,  27,  28,  29,  30],
##        [ 31,  32,  33,  34,  35,  36,  37,  38,  39,  40],
##        [ 41,  42,  43,  44,  45,  46,  47,  48,  49,  50],
##        [ 51,  52,  53,  54,  55,  56,  57,  58,  59,  60],
##        [ 61,  62,  63,  64,  65,  66,  67,  68,  69,  70],
##        [ 71,  72,  73,  74,  75,  76,  77,  78,  79,  80],
##        [ 81,  82,  83,  84,  85,  86,  87,  88,  89,  90],
##        [ 91,  92,  93,  94,  95,  96,  97,  98,  99, 100]])

Introducción a Pandas

Importar archivos

import pandas as pd
pulsars = pd.read_csv('HTRU_2.csv', header =None)
pulsars.columns = ['M_IP', 'SD_IP', 'EK_IP', 'SW_IP', 'M_DS', 'SD_DS', 'EK_DS', 'SW_DS', 'Class']
pulsars.head()
##          M_IP      SD_IP     EK_IP  ...      EK_DS       SW_DS  Class
## 0  140.562500  55.683782 -0.234571  ...   7.975532   74.242225      0
## 1  102.507812  58.882430  0.465318  ...  10.576487  127.393580      0
## 2  103.015625  39.341649  0.323328  ...   7.735822   63.171909      0
## 3  136.750000  57.178449 -0.068415  ...   6.896499   53.593661      0
## 4   88.726562  40.672225  0.600866  ...  14.269573  252.567306      0
## 
## [5 rows x 9 columns]
pulsars.describe()
##                M_IP         SD_IP  ...         SW_DS         Class
## count  17898.000000  17898.000000  ...  17898.000000  17898.000000
## mean     111.079968     46.549532  ...    104.857709      0.091574
## std       25.652935      6.843189  ...    106.514540      0.288432
## min        5.812500     24.772042  ...     -1.976976      0.000000
## 25%      100.929688     42.376018  ...     34.960504      0.000000
## 50%      115.078125     46.947479  ...     83.064556      0.000000
## 75%      127.085938     51.023202  ...    139.309330      0.000000
## max      192.617188     98.778911  ...   1191.000837      1.000000
## 
## [8 rows x 9 columns]
pulsars['M_IP'].head()
## 0    140.562500
## 1    102.507812
## 2    103.015625
## 3    136.750000
## 4     88.726562
## Name: M_IP, dtype: float64
  • Seleccionar datos por numeros de fila (.iloc)
  • Seleccionar datos por etiqueta o por una declaracion condicional (.loc)
pulsars.loc[11:15,['SD_IP','EK_IP']]
##         SD_IP     EK_IP
## 11  44.058244 -0.081060
## 12  49.554327 -0.135304
## 13  45.506577  0.325438
## 14  51.524484 -0.031852
## 15  51.945716 -0.094499
pulsars.loc[pulsars['SD_IP']>40,'SD_IP']
## 0        55.683782
## 1        58.882430
## 3        57.178449
## 4        40.672225
## 5        46.698114
##            ...    
## 17893    59.847421
## 17894    49.485605
## 17895    59.935939
## 17896    53.902400
## 17897    85.797340
## Name: SD_IP, Length: 14929, dtype: float64
pulsars.iloc [[ 0 , 3 , 6 , 24 ], [ 0 , 5 , 6 ]] 
##           M_IP      SD_DS      EK_DS
## 0   140.562500  19.110426   7.975532
## 3   136.750000  20.959280   6.896499
## 6   119.484375   9.279612  19.206230
## 24  106.648438  18.405371   9.378660
pulsars.iloc[20:41,2:4]
##        EK_IP     SW_IP
## 20  0.627487 -0.026498
## 21  0.142654  0.320420
## 22  0.326387  0.803502
## 23  0.257953 -0.405049
## 24  0.378355 -0.266372
## 25  0.279391 -0.129011
## 26  0.142597  0.018885
## 27 -0.002549 -0.460360
## 28  0.179377 -0.177285
## 29  0.230439  0.193325
## 30  0.094860  0.683113
## 31  0.475729  0.781486
## 32 -0.049280 -0.208257
## 33 -0.470773 -0.125946
## 34  0.177360  0.024918
## 35  0.315729 -0.202183
## 36 -0.131080 -0.288851
## 37  0.321157  1.821631
## 38  0.496005  1.481816
## 39 -0.109243  0.137684
## 40 -0.039592 -0.176243

Ordenar de forma creciente

pulsars.sort_values(by='EK_IP')
##              M_IP      SD_IP     EK_IP  ...     EK_DS     SW_DS  Class
## 15687  192.617188  56.926093 -1.876011  ...  2.764809  6.341108      0
## 12186  185.257812  63.003875 -1.738021  ... -1.013737 -0.047572      0
## 14049  182.835938  45.583847 -1.730782  ... -0.671097 -0.849901      0
## 14215  183.453125  47.683504 -1.707789  ... -0.117230 -1.546660      0
## 14816  176.726562  46.172184 -1.679039  ... -0.375842 -1.614449      0
## ...           ...        ...       ...  ...       ...       ...    ...
## 3981     8.226562  30.904898  7.627580  ... -0.331612 -0.497152      1
## 3802     8.156250  27.129446  7.856370  ... -0.177335 -0.673351      1
## 11589   10.320312  26.871201  7.875742  ...  1.635320  2.225703      1
## 11310    8.250000  26.943517  7.879628  ...  1.514147  1.877476      1
## 1834    11.140625  25.695250  8.069522  ...  0.781356 -0.152781      1
## 
## [17898 rows x 9 columns]
pulsars.sort_values(['M_IP','EK_IP'],ascending=[True,False])
##              M_IP      SD_IP     EK_IP  ...     EK_DS      SW_DS  Class
## 5043     5.812500  30.631313  7.152691  ...  0.237403   0.127258      1
## 4561     6.179688  28.370989  7.520589  ... -0.730653  -0.345935      1
## 9254     6.187500  29.694999  7.320157  ... -0.840554  -0.485269      1
## 5713     6.187500  30.853928  6.476885  ...  0.393867   0.097969      1
## 8052     6.265625  31.535319  6.141778  ...  0.260257  -0.917726      1
## ...           ...        ...       ...  ...       ...        ...    ...
## 12186  185.257812  63.003875 -1.738021  ... -1.013737  -0.047572      0
## 10369  186.023438  52.898704 -1.464810  ...  4.039774  17.416952      0
## 6564   189.734375  59.578268 -1.641515  ... -0.898056  -0.649195      0
## 4548   190.421875  59.106447 -1.517159  ...  0.058366  -1.964998      0
## 15687  192.617188  56.926093 -1.876011  ...  2.764809   6.341108      0
## 
## [17898 rows x 9 columns]
pulsars['SD_IP'][(pulsars['SD_IP']>40) & (pulsars['SD_IP']<100)]
## 0        55.683782
## 1        58.882430
## 3        57.178449
## 4        40.672225
## 5        46.698114
##            ...    
## 17893    59.847421
## 17894    49.485605
## 17895    59.935939
## 17896    53.902400
## 17897    85.797340
## Name: SD_IP, Length: 14929, dtype: float64

Modificaciones en el data Frame

pulsars.iloc[1,1] = -9999
pulsars.head()
##          M_IP        SD_IP     EK_IP  ...      EK_DS       SW_DS  Class
## 0  140.562500    55.683782 -0.234571  ...   7.975532   74.242225      0
## 1  102.507812 -9999.000000  0.465318  ...  10.576487  127.393580      0
## 2  103.015625    39.341649  0.323328  ...   7.735822   63.171909      0
## 3  136.750000    57.178449 -0.068415  ...   6.896499   53.593661      0
## 4   88.726562    40.672225  0.600866  ...  14.269573  252.567306      0
## 
## [5 rows x 9 columns]

Transformar las posiciones (3,1) a (13,1) reemplazando con valores 10, 9,…,1,0

np.linspace(0,10,11)
## array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.])
pulsars.iloc[3:14,1] = (np.linspace(0,10,11))[::-1]
pulsars.iloc[1:16,]
##           M_IP        SD_IP     EK_IP  ...      EK_DS       SW_DS  Class
## 1   102.507812 -9999.000000  0.465318  ...  10.576487  127.393580      0
## 2   103.015625    39.341649  0.323328  ...   7.735822   63.171909      0
## 3   136.750000    10.000000 -0.068415  ...   6.896499   53.593661      0
## 4    88.726562     9.000000  0.600866  ...  14.269573  252.567306      0
## 5    93.570312     8.000000  0.531905  ...  10.621748  131.394004      0
## 6   119.484375     7.000000  0.031460  ...  19.206230  479.756567      0
## 7   130.382812     6.000000 -0.158323  ...  13.539456  198.236457      0
## 8   107.250000     5.000000  0.452688  ...   9.001004  107.972506      0
## 9   107.257812     4.000000  0.465882  ...   7.397080   57.784738      0
## 10  142.078125     3.000000 -0.320328  ...   6.076266   37.831393      0
## 11  133.257812     2.000000 -0.081060  ...  11.972067  195.543448      0
## 12  134.960938     1.000000 -0.135304  ...   3.893934   14.131206      0
## 13  117.945312     0.000000  0.325438  ...   8.943212   82.475592      0
## 14  138.179688    51.524484 -0.031852  ...   5.155940   26.143310      0
## 15  114.367188    51.945716 -0.094499  ...   9.050612   96.611903      0
## 
## [15 rows x 9 columns]

Aplicar Funciones

pulsars.apply(np.cumsum)
##                M_IP          SD_IP  ...         SW_DS  Class
## 0      1.405625e+02      55.683782  ...  7.424222e+01      0
## 1      2.430703e+02   -9943.316218  ...  2.016358e+02      0
## 2      3.460859e+02   -9903.974568  ...  2.648077e+02      0
## 3      4.828359e+02   -9893.974568  ...  3.184014e+02      0
## 4      5.715625e+02   -9884.974568  ...  5.709687e+02      0
## ...             ...            ...  ...           ...    ...
## 17893  1.987696e+06  822381.823484  ...  1.876595e+06   1639
## 17894  1.987818e+06  822431.309089  ...  1.876603e+06   1639
## 17895  1.987938e+06  822491.245029  ...  1.876608e+06   1639
## 17896  1.988052e+06  822545.147429  ...  1.876742e+06   1639
## 17897  1.988109e+06  822630.944769  ...  1.876743e+06   1639
## 
## [17898 rows x 9 columns]
(pulsars.iloc[:,0:4]).apply(lambda x: x.max() - x.min())
## M_IP       186.804688
## SD_IP    10097.778911
## EK_IP        9.945533
## SW_IP       69.893508
## dtype: float64
  1. Calcular la varianza usando la función np.var y apply para cada variable numérica
(pulsars.iloc[:,0:4]).apply(np.var)
## M_IP      658.036325
## SD_IP    5685.754808
## EK_IP       1.132117
## SW_IP      38.041028
## dtype: float64

Notamos que :

pulsars.apply(type)
## M_IP     <class 'pandas.core.series.Series'>
## SD_IP    <class 'pandas.core.series.Series'>
## EK_IP    <class 'pandas.core.series.Series'>
## SW_IP    <class 'pandas.core.series.Series'>
## M_DS     <class 'pandas.core.series.Series'>
## SD_DS    <class 'pandas.core.series.Series'>
## EK_DS    <class 'pandas.core.series.Series'>
## SW_DS    <class 'pandas.core.series.Series'>
## Class    <class 'pandas.core.series.Series'>
## dtype: object
type(pulsars)
## <class 'pandas.core.frame.DataFrame'>

Group_by

pulsars.groupby(['Class'])
## <pandas.core.groupby.generic.DataFrameGroupBy object at 0x000001EF1903A3F0>
pulsars
##              M_IP        SD_IP     EK_IP  ...      EK_DS       SW_DS  Class
## 0      140.562500    55.683782 -0.234571  ...   7.975532   74.242225      0
## 1      102.507812 -9999.000000  0.465318  ...  10.576487  127.393580      0
## 2      103.015625    39.341649  0.323328  ...   7.735822   63.171909      0
## 3      136.750000    10.000000 -0.068415  ...   6.896499   53.593661      0
## 4       88.726562     9.000000  0.600866  ...  14.269573  252.567306      0
## ...           ...          ...       ...  ...        ...         ...    ...
## 17893  136.429688    59.847421 -0.187846  ...  15.450260  285.931022      0
## 17894  122.554688    49.485605  0.127978  ...   2.945244    8.297092      0
## 17895  119.335938    59.935939  0.159363  ...   2.499517    4.595173      0
## 17896  114.507812    53.902400  0.201161  ...  10.007967  134.238910      0
## 17897   57.062500    85.797340  1.406391  ...  -1.597527    1.429475      0
## 
## [17898 rows x 9 columns]