Dav*_*vid 1 mysql unidata multivalue u2 multivalue-database
我正在尝试将数据从多值数据库(Unidata)导出到MySQL.让我们说我的源数据是一个人的身份证号码,他们的名字和他们所居住的所有状态.州字段是一个多值字段,我正在导出它们,以便该字段中的不同值由一个分隔. 〜.样本提取物看起来像:
"1234","Sally","NY~NJ~CT"
"1235","Dave","ME~MA~FL"
"3245","Fred","UT~CA"
"2344","Sue","OR"
Run Code Online (Sandbox Code Playgroud)
我已将此数据加载到临时表中
Table:staging
Column 1: personId
Column 2: name
Column 3: states
Run Code Online (Sandbox Code Playgroud)
我想要做的是使用一个过程将这些数据分成两个表:一个人员表和一个状态表.一个人可以在州表中有许多条目:
Table 1: persons
Column 1: id
Column 2: name
Table 2: states
Column 1: personId
Column 2: state
Run Code Online (Sandbox Code Playgroud)
我的过程从临时表中获取数据并将其转储到表1就好了.但是,我有点迷失如何分割数据并将其发送到表2.莎莉需要在状态表(NY,NJ,CT)中有三个条目,Dave会有3个,Fred会有2个和苏将有1(或).有关如何实现这一目标的任何想法?
尝试这样的事情:http://pastie.org/1213943
-- TABLES
drop table if exists staging;
create table staging
(
person_id int unsigned not null primary key,
name varchar(255) not null,
states_csv varchar(1024)
)
engine=innodb;
drop table if exists persons;
create table persons
(
person_id int unsigned not null primary key,
name varchar(255) not null
)
engine=innodb;
drop table if exists states;
create table states
(
state_id tinyint unsigned not null auto_increment primary key, -- i want a nice new integer based PK
state_code varchar(3) not null unique, -- original state code from staging
name varchar(255) null
)
engine=innodb;
/*
you might want to make the person_states primary key (person_id, state_id) depending on
your queries as this is currently optimised for queries like - select all the people from NY
*/
drop table if exists person_states;
create table person_states
(
state_id tinyint unsigned not null,
person_id int unsigned not null,
primary key(state_id, person_id),
key (person_id)
)
engine=innodb;
-- STORED PROCEDURES
drop procedure if exists load_staging_data;
delimiter #
create procedure load_staging_data()
proc_main:begin
truncate table staging;
-- assume this is done by load data infile...
set autocommit = 0;
insert into staging values
(1234,'Sally','NY~NJ~CT'),
(1235,'Dave','ME~MA~FL'),
(3245,'Fred','UT~CA'),
(2344,'Sue','OR'),
(5555,'f00','OR~NY');
commit;
end proc_main #
delimiter ;
drop procedure if exists cleanse_map_staging_data;
delimiter #
create procedure cleanse_map_staging_data()
proc_main:begin
declare v_cursor_done tinyint unsigned default 0;
-- watch out for variable names that have the same names as fields !!
declare v_person_id int unsigned;
declare v_states_csv varchar(1024);
declare v_state_code varchar(3);
declare v_state_id tinyint unsigned;
declare v_states_done tinyint unsigned;
declare v_states_idx int unsigned;
declare v_staging_cur cursor for select person_id, states_csv from staging order by person_id;
declare continue handler for not found set v_cursor_done = 1;
-- do the person data
set autocommit = 0;
insert ignore into persons (person_id, name)
select person_id, name from staging order by person_id;
commit;
-- ok now we have to use the cursor !!
set autocommit = 0;
open v_staging_cur;
repeat
fetch v_staging_cur into v_person_id, v_states_csv;
-- clean up the data (for example)
set v_states_csv = upper(trim(v_states_csv));
-- split the out the v_states_csv and insert
set v_states_done = 0;
set v_states_idx = 1;
while not v_states_done do
set v_state_code = substring(v_states_csv, v_states_idx,
if(locate('~', v_states_csv, v_states_idx) > 0,
locate('~', v_states_csv, v_states_idx) - v_states_idx,
length(v_states_csv)));
set v_state_code = trim(v_state_code);
if length(v_state_code) > 0 then
set v_states_idx = v_states_idx + length(v_state_code) + 1;
-- add the state if it doesnt already exist
insert ignore into states (state_code) values (v_state_code);
select state_id into v_state_id from states where state_code = v_state_code;
-- add the person state
insert ignore into person_states (state_id, person_id) values (v_state_id, v_person_id);
else
set v_states_done = 1;
end if;
end while;
until v_cursor_done end repeat;
close v_staging_cur;
commit;
end proc_main #
delimiter ;
-- TESTING
call load_staging_data();
select * from staging;
call cleanse_map_staging_data();
select * from states order by state_id;
select * from persons order by person_id;
select * from person_states order by state_id, person_id;
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
8652 次 |
| 最近记录: |