I’ve spent some time recently playing with PL/SQL arrays in the context of uploading from flat-files.
In the course of this, it struck me that PL/SQL arrays come in a variety of shapes and sizes ( or in this case, small, medium and large).
So, if Sir – or Madam – would care to step into the fitting room, we’ll see if we can find something to suit.
At this point, it’s probably worth referring you to the post about UTL_FILE as we’ll be re-using the file generated in that post for these examples.
For the rest of this article, I’m going to assume that we know the column order of the file we’re reading and that the data is being uploaded into a holding table with the same column order.
Small and Sociable
When you’re only processing a small number of records, especially one row at a time inserts, it’s quite nice to be able to make use of an associative array, if only to help make the code a bit more readable.
There’s a post here which explains the concept.
So, if we apply this to the one-at-a-time insert from a flat-file, we can replace the tbl_cols(n) notation of the table indexed by a PLS_INTEGER ( so last year, darling !) with the slightly more meaningful tbl_cols(‘column_name’) :
set serveroutput on size 1000000
--
-- Script to read the emp_em.csv file and insert the contents
-- into the SPAM_TARGETS table - this time using an associative array
--
DECLARE
l_fp UTL_FILE.FILE_TYPE;
l_dir VARCHAR2(30) := 'MYFILES';
l_fname VARCHAR2(30) := 'emp_em.csv';
l_buffer VARCHAR2(100);
--
-- Variable for doing the substring thing
--
l_start PLS_INTEGER := 1;
l_pos PLS_INTEGER;
--
-- PL/SQL table to hold the individual data items in each row
-- this time indexed by VARCHAR2
TYPE typ_cols IS TABLE OF VARCHAR2(30) INDEX BY VARCHAR2(10);
tbl_cols typ_cols;
l_col_no PLS_INTEGER;
l_count PLS_INTEGER := 0;
BEGIN
--
-- Open the file, but this time we want to READ from it
--
l_fp := UTL_FILE.FOPEN( location => l_dir, filename => l_fname,
OPEN_MODE => 'R');
IF NOT UTL_FILE.IS_OPEN(l_fp) THEN
DBMS_OUTPUT.PUT_LINE('Unable to open file.');
RETURN;
END IF;
--
-- Need to discard the first line because it's got the header record.
--
UTL_FILE.GET_LINE(l_fp, l_buffer);
DBMS_OUTPUT.PUT_LINE('Got the header : ');
DBMS_OUTPUT.PUT_LINE(l_buffer);
--
-- Now loop through the rest of the file and insert each record into
-- the table
--
LOOP
BEGIN
--
-- Need this nested block
-- because we'll get a NO_DATA_FOUND error when we've
-- read past the last line of the file and we need to
-- handle it here.
--
UTL_FILE.GET_LINE( l_fp, l_buffer);
IF l_buffer IS NULL THEN
--
-- End of the file so exit the loop
--
EXIT;
END IF;
--
-- Split the line back into it's component data items
--
l_col_no := 1;
l_start := 1;
WHILE INSTR( l_buffer, ',', 1,l_col_no) != 0 LOOP
--
-- Loop through the line to get all the values
-- Don't really need a PL/SQL table for 3 columns, but if there's
-- a lot more it may well save you some typing
--
l_pos := INSTR( l_buffer, ',', 1, l_col_no);
IF l_col_no = 1 THEN
tbl_cols( 'first_name') :=
SUBSTR( l_buffer, l_start, l_pos - l_start);
ELSIF l_col_no = 2 THEN
tbl_cols( 'last_name') :=
SUBSTR( l_buffer, l_start, l_pos - l_start);
END IF;
l_start := l_pos + 1;
l_col_no := l_col_no + 1;
END LOOP;
--
-- Get the last value on the line ( it won't have a ',' at the end)
--
l_start := l_pos + 1;
tbl_cols('email') := SUBSTR( l_buffer, l_start);
--
-- Now insert the record
--
INSERT INTO spam_targets( first_name, last_name, email)
VALUES( tbl_cols('first_name'), tbl_cols('last_name'),
tbl_cols('email'));
l_count := l_count + 1;
DBMS_OUTPUT.PUT_LINE('Insert done - '||l_count);
EXCEPTION WHEN NO_DATA_FOUND THEN
--
-- We've read past the end of the file
--
EXIT;
END;
END LOOP;
UTL_FILE.FCLOSE( l_fp);
END;
/
Big and Bulky
You’re looking for something a bit more weighty, as you’ve got some pretty large amounts of data to deal with ? OK, let’s try the brute-force batch job approach :
set serveroutput on size 1000000
--
-- Script to read the emp_em.csv file and insert the contents
-- into the SPAM_TARGETS table - this time using a table of records
-- and the forall statement
--
DECLARE
l_fp UTL_FILE.FILE_TYPE;
l_dir VARCHAR2(30) := 'MYFILES';
l_fname VARCHAR2(30) := 'emp_em.csv';
l_buffer VARCHAR2(100);
--
-- Variable for doing the substring thing
--
l_start PLS_INTEGER := 1;
l_pos PLS_INTEGER;
--
-- PL/SQL table setup
--
TYPE typ_st IS TABLE OF spam_targets%ROWTYPE INDEX BY PLS_INTEGER;
tbl_st typ_st;
l_col_no PLS_INTEGER;
l_count PLS_INTEGER := 0;
BEGIN
--
-- Open the file to Read from
--
l_fp := UTL_FILE.FOPEN( location => l_dir, filename => l_fname,
OPEN_MODE => 'R');
IF NOT UTL_FILE.IS_OPEN(l_fp) THEN
DBMS_OUTPUT.PUT_LINE('Unable to open file.');
RETURN;
END IF;
--
-- Need to discard the first line because it's got the header record.
--
UTL_FILE.GET_LINE(l_fp, l_buffer);
DBMS_OUTPUT.PUT_LINE('Got the header : ');
DBMS_OUTPUT.PUT_LINE(l_buffer);
--
-- Now loop through the rest of the file and populate the PL/SQL table
-- with the data line-by-line
--
LOOP
l_count := l_count + 1;
BEGIN
--
-- Need this nested block
-- because we'll get a NO_DATA_FOUND error when we've
-- read past the last line of the file and we need to
-- handle it here.
--
UTL_FILE.GET_LINE( l_fp, l_buffer);
IF l_buffer IS NULL THEN
--
-- End of the file so exit the loop
--
EXIT;
END IF;
--
-- Split the buffer into it's component data items
--
l_col_no := 1;
l_start := 1;
WHILE INSTR( l_buffer, ',', 1,l_col_no) != 0 LOOP
--
-- Loop through the line to get all the values
-- Don't really need a PL/SQL table for 3 columns, but if there's
-- a lot more it may well save you some typing
--
l_pos := INSTR( l_buffer, ',', 1, l_col_no);
IF l_col_no = 1 THEN
tbl_st(l_count).first_name :=
SUBSTR( l_buffer, l_start, l_pos - l_start);
ELSIF l_col_no = 2 THEN
tbl_st(l_count).last_name :=
SUBSTR( l_buffer, l_start, l_pos - l_start);
END IF;
l_start := l_pos + 1;
l_col_no := l_col_no + 1;
END LOOP;
--
-- Get the last value on the line ( it won't have a ',' at the end)
--
l_start := l_pos + 1;
tbl_st(l_count).email := SUBSTR( l_buffer, l_start);
EXCEPTION WHEN NO_DATA_FOUND THEN
--
-- We've read past the end of the file
--
EXIT;
END;
END LOOP;
--
-- Now do a forall insert using the contents of the PL/SQL table
--
FORALL i IN 1..tbl_st.COUNT
INSERT INTO spam_targets VALUES tbl_st(i);
END;
/
Using forall to do a bulk insert can be quite effective in performance terms, but there are some potential pit-falls with letting your collection grow unchecked.
Not least of these is that the collection resides in PGA and if you don’t control it’s size programmatically, it could potentially cause problems system-wide as it hogs all of the available PGA memory.
Fortunately, there is a variation which may fit the bill…
Controlled and Chunky
Here, we’re going to make sure that the table never gets beyond a set number of records. We can set the l_max_size constant depending on the number and size of the records, whether this is a batch job etc …
set serveroutput on size 1000000
--
-- Script to read the emp_em.csv file and insert the contents
-- into the SPAM_TARGETS table - this time using a table of records
-- and the forall statement AND splitting the insert so the
-- array doesn't take up too much PGA
--
DECLARE
l_fp UTL_FILE.FILE_TYPE;
l_dir VARCHAR2(30) := 'MYFILES';
l_fname VARCHAR2(30) := 'emp_em.csv';
l_buffer VARCHAR2(100);
--
-- Variable for doing the substring thing
--
l_start PLS_INTEGER := 1;
l_pos PLS_INTEGER;
--
-- PL/SQL table setup
--
l_max_size CONSTANT PLS_INTEGER := 25;
TYPE typ_st IS TABLE OF spam_targets%ROWTYPE INDEX BY PLS_INTEGER;
tbl_st typ_st;
l_col_no PLS_INTEGER;
l_count PLS_INTEGER := 0;
BEGIN
--
-- Open the file to Read from
--
l_fp := UTL_FILE.FOPEN( location => l_dir, filename => l_fname,
OPEN_MODE => 'R');
IF NOT UTL_FILE.IS_OPEN(l_fp) THEN
DBMS_OUTPUT.PUT_LINE('Unable to open file.');
RETURN;
END IF;
--
-- Need to discard the first line because it's got the header record.
--
UTL_FILE.GET_LINE(l_fp, l_buffer);
DBMS_OUTPUT.PUT_LINE('Got the header : ');
DBMS_OUTPUT.PUT_LINE(l_buffer);
--
-- Now loop through the rest of the file and populate the PL/SQL table
-- with the data line-by-line
--
LOOP
l_count := l_count + 1;
BEGIN
--
-- Need this nested block
-- because we'll get a NO_DATA_FOUND error when we've
-- read past the last line of the file and we need to
-- handle it here.
--
UTL_FILE.GET_LINE( l_fp, l_buffer);
IF l_buffer IS NULL THEN
--
-- End of the file so exit the loop
--
EXIT;
END IF;
--
-- Split the buffer into it's component data items
--
l_col_no := 1;
l_start := 1;
WHILE INSTR( l_buffer, ',', 1,l_col_no) != 0 LOOP
--
-- Loop through the line to get all the values
-- Don't really need a PL/SQL table for 3 columns, but if there's
-- a lot more it may well save you some typing
--
l_pos := INSTR( l_buffer, ',', 1, l_col_no);
IF l_col_no = 1 THEN
tbl_st(l_count).first_name :=
SUBSTR( l_buffer, l_start, l_pos - l_start);
ELSIF l_col_no = 2 THEN
tbl_st(l_count).last_name :=
SUBSTR( l_buffer, l_start, l_pos - l_start);
END IF;
l_start := l_pos + 1;
l_col_no := l_col_no + 1;
END LOOP;
--
-- Get the last value on the line ( it won't have a ',' at the end)
--
l_start := l_pos + 1;
tbl_st(l_count).email := SUBSTR( l_buffer, l_start);
EXCEPTION WHEN NO_DATA_FOUND THEN
--
-- We've read past the end of the file
--
EXIT;
END;
--
-- Check the number of records in the array. If it's reached
-- our limit then insert and cleardown
--
IF l_count = l_max_size THEN
DBMS_OUTPUT.PUT_LINE('Inserting '||l_max_size||' records.');
FORALL i IN 1..tbl_st.COUNT
INSERT INTO spam_targets VALUES tbl_st(i);
tbl_st.DELETE;
l_count := 0;
END IF;
END LOOP;
--
-- Now check to see if there are any records left in the array
--
IF tbl_st.COUNT > 0 THEN
DBMS_OUTPUT.PUT_LINE(tbl_st.COUNT||' records to insert');
FORALL j IN 1..tbl_st.COUNT
INSERT INTO spam_targets VALUES tbl_st(j);
END IF;
END;
/
In keeping with the fashion-designer theme – that’s all for now darling, simply must dash. Mwah ! Mwah!