## File Formats, Importing & Exporting Data

In [12]:
# what is a file?
# a file is a series of 1s and 0s stored on a writeable/readable device (e.g. hard disk)
# what is a disk? (a "permanent" storage device)

In [27]:
mydata = b"hello"
print(type(mydata))
print(len(mydata))
print(mydata)
fid = open("myfile","wb")
fid.write(b"hello")
fid.close()

<class 'bytes'>
5
b'hello'


In [28]:
fid = open("myfile","rb")
data = fid.read()
fid.close()
print(type(data))
print(data)

<class 'bytes'>
b'hello'


In [30]:
# use the unix/MacOS utility 'hexdump' to view the file byte by byte, 
# in hexadecimal format
# remember: 1 byte = 8 bits

!hexdump -C myfile 

00000000  68 65 6c 6c 6f                                    |hello|
00000005


In [42]:
print(f"binary  : {data[0]:08b}") # print 1st byte (8 bits) in binary format
print(f"hex     : {data[0]:x}")   # print 1st byte in hexadecimal format
print(f"decimal : {data[0]:d}")   # print 1st byte in decimal format
print(f"char    : {data[0]:c}")   # print 1st byte as an ascii character

binary  : 01101000
hex     : 68
decimal : 104
char    : h


In [55]:
# reading/writing plaintext data using NumPy
import numpy as np

# e.g. a csv file (columns of comma separated values)
data = np.genfromtxt("bball.csv", delimiter=",", skip_header=True)
print(data[0:10,:])

[[ 80.     0.57  33.88 185.    45.    24.    93.5 ]
 [ 69.     0.41  36.22 178.    43.9   30.    85.6 ]
 [ 81.     0.44  26.7  196.    46.2   34.    84.8 ]
 [ 55.     0.46  36.55 185.    43.5   23.    77.6 ]
 [ 70.     0.27  24.27 188.    41.5   26.    67.3 ]
 [ 81.     0.4   30.77 188.    49.9   26.    86.1 ]
 [ 10.     0.24   9.7  210.    36.    33.    83.3 ]
 [ 70.     0.44  17.76 180.    45.1   26.    83.1 ]
 [ 46.     0.37  12.   196.    36.9   24.    67.3 ]
 [ 41.     0.45  12.12 188.    41.5   31.    77.8 ]]


In [57]:
np.savetxt("newfile.csv", np.random.rand(10,3))

In [59]:
# using Pandas to load a csv as a data frame
# advantage: Pandas data frames (like in R) names columns

import pandas as pd

In [62]:
d = pd.read_csv("bball.csv")
print(d)

     GAMES   PPM    MPG    HGT   FGP   AGE   FTP
0     80.0  0.57  33.88  185.0  45.0  24.0  93.5
1     69.0  0.41  36.22  178.0  43.9  30.0  85.6
2     81.0  0.44  26.70  196.0  46.2  34.0  84.8
3     55.0  0.46  36.55  185.0  43.5  23.0  77.6
4     70.0  0.27  24.27  188.0  41.5  26.0  67.3
..     ...   ...    ...    ...   ...   ...   ...
100   81.0  0.43  25.67  198.0  45.3  30.0  84.0
101   35.0  0.33   6.91  188.0  36.5  23.0  81.8
102   76.0  0.43  35.01  188.0  44.6  27.0  90.7
103   40.0  0.39   4.35  183.0  34.8  23.0  73.1
104   63.0  0.22  13.27  193.0  41.3  31.0  87.5

[105 rows x 7 columns]
