scientimate.readasciitable#
data_DataFrame, data_array = scientimate.readasciitable(file_name, file_directory=None, \
column_delimiter=None, \
skip_beginning_rows=0, header_row=None, unit_row='no', \
output_start_row=0, output_end_row=-1, \
output_start_column=0, output_end_column=-1, output_columns=None, \
drop_duplicate='no', find_missing_rows='no', interpolate_nan='no', nan_values=None, \
index_column=False, \
dispout='no')
Description#
Read and extract data from ASCII, text, Comma Separated Values (CSV), and spreadsheet (xlsx, xls, ods) file
Inputs#
- file_name
- Name of data file between ‘ ‘ mark, example: ‘data.csv’Acceptable file types are:ASCII file, example: ‘data.xyz’Text file, example: ‘data.txt’Comma Separated Values (CSV) file, example: ‘data.csv’
- file_directory=None
Location of data file between ‘ ‘ mark, example: r’C:’
- column_delimiter=None
- Character (delimiter) that separates column from each other between ‘ ‘ mark, example: ‘,’For column_delimiter=None, it uses default delimiterFor ASCII or text file, if data are separated by a single space, use ‘ ‘For ASCII or text file, if data are separated by tab, use ‘t’For ASCII or text file, if data are separated by comma, use ‘,’For CSV (comma-separated values) file, use ‘,’Use ‘s+’ for white space longer than 1
- skip_beginning_rows=0
Number of rows to be skipped at the beginning of the file
- header_row=None
- Index of a row that contains column headersExample: header_row=0 means 1st line is a header row
- unit_row=’no’
Define if there is a unit row after a header row or not | ‘yes’: there is a unit row after header row | ‘no’: there is not a unit row after header row
- output_start_row=0
- Index of first row to readRows will be extracted from output_start_row to output_end_row
- output_end_row=-1
- Index of last row to readRows will be extracted from output_start_row to output_end_row
- output_start_column=0
- Index of first column to readColumns will be extracted from output_start_column to output_end_column
- output_end_column=-1
- Index of last column to readColumns will be extracted from output_start_column to output_end_column
- output_columns=None
- List of column indexes to be extractedNone: columns will be extracted from output_start_column to output_end_columnIf output_columns is not None then output_start_column and output_end_column are ignoredExample: output_columns=[0,1] means 1st and 2nd columns are returned
- drop_duplicate=’no’
- Drop duplicate values‘yes’: it drops duplicate values‘no’: it does not drop duplicate values
- find_missing_rows=’no’
- Find missing rows (indexes) and insert them as empty rows‘yes’: it find missing dates‘no’: it does not find missing dates
- interpolate_nan=’no’
- Interpolate and replace NaN values‘yes’: it replaces NaN values‘no’: it does not replace NaN values
- nan_values=None
List of values to be considered as NaN
- index_column=False
Index of a column that contains row labels
- dispout=’no’
- Display outputs‘yes’: it displays outputs‘no’: it does not display outputs
Outputs#
- data_DataFrame
data extracted from input data file as Pandas DataFrame
- data_array
data extracted from input data file as NumPy array
Examples#
import scientimate as sm
import os
#First example
#Download NDBC data
station_ID='42035'
data_type='historical'
data_year=2019
data_month=None
file_directory=os.getcwd()
station_url, station_real_time_url, station_hist_data_url = sm.downloadndbcdata(station_ID, data_type, data_year, data_month, file_directory)
#Read NDBC data
file_name = station_ID+'h'+str(data_year)+'.txt'
file_directory = os.getcwd() #Example: r'C:'
column_delimiter = '\s+'
skip_beginning_rows = 0
header_row = 0
unit_row = 'yes'
output_start_row = 0
output_end_row = -1
output_start_column = 0
output_end_column = -1
output_columns = [5,6,8,9,10,17]
drop_duplicate = 'yes'
find_missing_rows = 'yes'
interpolate_nan = 'yes'
nan_values = [99.00,999,999.0,99.0]
index_column = False
dispout = 'yes'
data_DataFrame, data_array=sm.readasciitable(file_name, file_directory=file_directory, \
column_delimiter=column_delimiter, \
skip_beginning_rows=skip_beginning_rows, header_row=header_row, unit_row=unit_row, \
output_start_row=output_start_row, output_end_row=output_end_row, \
output_start_column=output_start_column, output_end_column=output_end_column, output_columns=output_columns, \
drop_duplicate=drop_duplicate, find_missing_rows=find_missing_rows, interpolate_nan=interpolate_nan, nan_values=nan_values, \
index_column=index_column, \
dispout=dispout)
#Second example
#Download Tide and Current data
station_ID='8761724'
begin_date='20190201'
end_date='20190301'
export_parameter='water_level'
interval='h'
units='metric'
datum='STND'
time_zone='gmt'
file_directory=os.getcwd()
station_information_json, station_datum_json, station_sensors_json, station_url = sm.downloadtidecurrentdata(station_ID, begin_date, end_date, export_parameter, interval, units, datum, time_zone, file_directory)
#Read Tide and Current data
file_name = 'CO-OPS_'+station_ID+'_'+export_parameter+'_'+begin_date+'_'+end_date+'.csv'
file_directory = os.getcwd() #Example: r'C:'
column_delimiter = ','
skip_beginning_rows = 0
header_row = 0
unit_row = 'no'
output_start_row = 0
output_end_row = -1
output_start_column = 0
output_end_column = -1
output_columns = [1]
drop_duplicate = 'yes'
find_missing_rows = 'yes'
interpolate_nan = 'yes'
nan_values = ['NaN']
index_column = False
dispout = 'yes'
data_DataFrame, data_array=sm.readasciitable(file_name, file_directory=file_directory, \
column_delimiter=column_delimiter, \
skip_beginning_rows=skip_beginning_rows, header_row=header_row, unit_row=unit_row, \
output_start_row=output_start_row, output_end_row=output_end_row, \
output_start_column=output_start_column, output_end_column=output_end_column, output_columns=output_columns, \
drop_duplicate=drop_duplicate, find_missing_rows=find_missing_rows, interpolate_nan=interpolate_nan, nan_values=nan_values, \
index_column=index_column, \
dispout=dispout)