use*_*802 5 sql t-sql sql-server
我试图分裂的URL,并得到各一部分domain,category,subcategory等,并插入每一部分插入表中。例如:
"www.mydomain.com/toolsanddownloads/dailymealplanner.html?languageid=6"
Run Code Online (Sandbox Code Playgroud)
目的是在页面不存在时执行404重定向。我正在尝试使用CTE编写SQL语句并获取域的每个部分
;with cte AS
(
SELECT
CASE
WHEN RIGHT(RTRIM(URL),1) = '/' THEN LEFT(URL,LEN(URL)-1)
WHEN RIGHT(RTRIM(URL),5) = '.html' THEN LEFT(URL,LEN(URL)-5)
ELSE URL
END AS URL1,
StartPos = CharIndex('//', URL)+2
FROM [dbo].[404RedirectTemp]
)
SELECT URL1, SUBSTRING(URL1, 8, CHARINDEX('/', URL1, 9) - 8) AS DomainName,
REVERSE(SUBSTRING(REVERSE(URL1), CHARINDEX('?', REVERSE(URL1)) + 1,
CHARINDEX('/', REVERSE(URL1)) - CHARINDEX('?', REVERSE(URL1)) -1)) AS CategoryName,
SUBSTRING(URL1, CHARINDEX('?', URL1) + 1, LEN(URL1)) AS QueryParameter
FROM cte;
Run Code Online (Sandbox Code Playgroud)
我总是得到类别名称的最后一位,这是错误的,因为某些URL是 http://www.mydomain.com/toolsanddownloads/dailymealplanner.html?languageid=6
一些
"www.mydomain.com/toolsanddownloads"
"www.mydomain.com/toolsanddownloads/dailymealplanner.html"
Run Code Online (Sandbox Code Playgroud)
我想要实现的是是不管URL多少段有我想他们都为列: domain,categories,subcategories,brand,product
如果域仅具有要获取类别的类别,则如果要获取子类别的类别和子类别
我在临时表中有4000多个URL,我想遍历每个表并更新其他表以进行404重定向
转换为行并像数组索引一样处理怎么样?例如:
让我们设置示例环境
create table #url (id int, url varchar(500));
insert into #url select 1, 'http://stackoverflow.com/questions/18660573/split-url-using-sql-and-add-to-database';
insert into #url select 2, 'www.mydomain.com/toolsanddownloads';
insert into #url select 3, 'www.mydomain.com/toolsanddownloads?test=2&b=4';
insert into #url select 4, 'www.mydomain.com/toolsanddownloads/dailymealplanner.html'
Run Code Online (Sandbox Code Playgroud)
清理一下数据(临时表上可能会留下原始日志)
update #url set url = replace(url, 'http://','');
update #url set url = replace(url, '?','/^');
update #url set url = replace(url, '&','^');
Run Code Online (Sandbox Code Playgroud)
现在有趣的事情
with rslt as (
SELECT row_number() OVER( partition by id ORDER BY (SELECT 1)) depth
, value = y.i.value('.', 'nvarchar(4000)')
FROM
(
SELECT id, x = CONVERT(XML, '<i>'
+ REPLACE(url, '/', '</i><i>')
+ '</i>').query('.')
from #url
) AS a CROSS APPLY x.nodes('i') AS y(i)
)
select case
when value like '^%' then 'querystring'
when depth= 1 then 'Domain'
when depth=2 then 'categories'
when depth=3 then 'subcategories'
when depth=4 then 'brand'
when depth=5 then 'product'
end section
, case when depth>1 and charindex('.', value)>0
then left(value,charindex('.', value)-1)
else value end section
from rslt;
Run Code Online (Sandbox Code Playgroud)
结果如下:
Domain stackoverflow.com
categories questions
subcategories 18660573
brand split-url-using-sql-and-add-to-database
Domain www.mydomain.com
categories toolsanddownloads
Domain www.mydomain.com
categories toolsanddownloads
querystring ^test=2^b=4
Domain www.mydomain.com
categories toolsanddownloads
subcategories dailymealplanner
Run Code Online (Sandbox Code Playgroud)